20220506_10x_RZB

Lung ILCs (immune cells with partly residual T/B which need be kept in this version)
loading 50k cells, demo only get 13,474
(to increase datasize could get some more )

0604: plus data called 20k more in cellranger, total 35,367 cells
(as BAS/EOS/NEU relatively get much lower nGene comparing with ILCs/Macrophages)
(to get much better resolution of those granulocytes, may should increase datasize by 1x to 3x)

20240407
rerun initial process with currently modified workflow for the two-year-ago data
separate LUNG and BALF after preAnno

individual LUNG data plot several figures

source("/Shared_win/projects/RNA_normal/analysis.10x.r")

load 10x data

filt.10x <- Read10X(data.dir = "J:/projects_local/projects/20220506_10x_RZB/output_jiace0518/filtered_feature_bc_matrix/")
## 10X data contains more than one type and is being returned as a list containing matrices of each type.
GEX <- filt.10x$`Gene Expression`
FB <- filt.10x$`Antibody Capture`

check datasets

dim(GEX)
## [1] 32285 35367
GEX[1:6,1:6]
## 6 x 6 sparse Matrix of class "dgCMatrix"
##         AAACCCAAGACATAAC-1 AAACCCAAGAGCATTA-1 AAACCCAAGCATCAAA-1
## Xkr4                     .                  .                  .
## Gm1992                   .                  .                  .
## Gm19938                  .                  .                  .
## Gm37381                  .                  .                  .
## Rp1                      .                  .                  .
## Sox17                    .                  .                  .
##         AAACCCAAGCCGGAAT-1 AAACCCAAGCTAAACA-1 AAACCCAAGGTAAGGA-1
## Xkr4                     .                  .                  .
## Gm1992                   .                  .                  .
## Gm19938                  .                  .                  .
## Gm37381                  .                  .                  .
## Rp1                      .                  .                  .
## Sox17                    .                  .                  .
dim(FB)
## [1]     8 35367
FB[,1:6]
## 8 x 6 sparse Matrix of class "dgCMatrix"
##                    AAACCCAAGACATAAC-1 AAACCCAAGAGCATTA-1 AAACCCAAGCATCAAA-1
## hashtag1_TotalSeqB                  8                 11               4566
## hashtag2_TotalSeqB                 33                 11                132
## hashtag3_TotalSeqB                 20                110                 46
## hashtag4_TotalSeqB                  8                  4                238
## hashtag5_TotalSeqB                 82                  6                 24
## hashtag6_TotalSeqB                 11                  4                 31
## hashtag7_TotalSeqB                  5                139                 26
## hashtag8_TotalSeqB                 24                 14                 64
##                    AAACCCAAGCCGGAAT-1 AAACCCAAGCTAAACA-1 AAACCCAAGGTAAGGA-1
## hashtag1_TotalSeqB                 15                 29                 18
## hashtag2_TotalSeqB                 14                452                388
## hashtag3_TotalSeqB                 10                 22                 12
## hashtag4_TotalSeqB                  7                 12                  7
## hashtag5_TotalSeqB                  9                  8                  6
## hashtag6_TotalSeqB                 17               1168                 10
## hashtag7_TotalSeqB                 11                 12                  5
## hashtag8_TotalSeqB                179                 20                 13
# change the rownames as sample names 
rownames(FB) <- c("LUNG.CTL1","LUNG.CTL2",
                  "LUNG.CKO1","LUNG.CKO2",
                  "BALF.CTL1","BALF.CTL2",
                  "BALF.CKO1","BALF.CKO2")
dim(FB)
## [1]     8 35367
FB[,1:6]
## 8 x 6 sparse Matrix of class "dgCMatrix"
##           AAACCCAAGACATAAC-1 AAACCCAAGAGCATTA-1 AAACCCAAGCATCAAA-1
## LUNG.CTL1                  8                 11               4566
## LUNG.CTL2                 33                 11                132
## LUNG.CKO1                 20                110                 46
## LUNG.CKO2                  8                  4                238
## BALF.CTL1                 82                  6                 24
## BALF.CTL2                 11                  4                 31
## BALF.CKO1                  5                139                 26
## BALF.CKO2                 24                 14                 64
##           AAACCCAAGCCGGAAT-1 AAACCCAAGCTAAACA-1 AAACCCAAGGTAAGGA-1
## LUNG.CTL1                 15                 29                 18
## LUNG.CTL2                 14                452                388
## LUNG.CKO1                 10                 22                 12
## LUNG.CKO2                  7                 12                  7
## BALF.CTL1                  9                  8                  6
## BALF.CTL2                 17               1168                 10
## BALF.CKO1                 11                 12                  5
## BALF.CKO2                179                 20                 13
rowSums(FB)
## LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2 BALF.CKO1 BALF.CKO2 
##   5088467   6731641   5112068   2865750   2650375   3758840   3147575   4466945
rowMeans(FB)
## LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2 BALF.CKO1 BALF.CKO2 
## 143.87613 190.33678 144.54344  81.02893  74.93921 106.28100  88.99751 126.30263
apply(FB,1,max)
## LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2 BALF.CKO1 BALF.CKO2 
##     18197     12828     10479      5878      8190     12873      6653     15314
apply(FB,1,median)
## LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2 BALF.CKO1 BALF.CKO2 
##        21        30        23        11        11        16        14        23
rowSums(FB>0)
## LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2 BALF.CKO1 BALF.CKO2 
##     35357     35358     35355     35280     35273     35355     35339     35357

FB

# shorten rownames    
#rownames(FB) <- paste0("hashtag",1:8)

# build seurat object
FB.seur <- CreateSeuratObject(counts = FB,project = "Lung_FB")

FB.seur
## An object of class Seurat 
## 8 features across 35367 samples within 1 assay 
## Active assay: RNA (8 features, 0 variable features)
rownames(FB.seur)
## [1] "LUNG.CTL1" "LUNG.CTL2" "LUNG.CKO1" "LUNG.CKO2" "BALF.CTL1" "BALF.CTL2"
## [7] "BALF.CKO1" "BALF.CKO2"

normalization

perform Seurat ’Demultiplexing with hashtag oligos(HTOs)
ref https://satijalab.org/seurat/articles/hashing_vignette.html

# normalize
FB.seur <- NormalizeData(FB.seur)
FB.seur <- FindVariableFeatures(FB.seur, selection.method = "mean.var.plot")
FB.seur <- ScaleData(FB.seur, features = VariableFeatures(FB.seur))
## Centering and scaling data matrix
# independent assay for HTO data  
FB.seur[['HTO']] <- CreateAssayObject(counts = FB)
FB.seur <- NormalizeData(FB.seur, assay = 'HTO', normalization.method = 'CLR')
## Normalizing across features

check demux

first define FB colors based on conditions

color.FB <- c(ggsci::pal_d3("category20c")(20)[c(1,6,2,7)],
              ggsci::pal_d3("category20b")(20)[c(7,12,13,18)])

#color.FB <- color.FB[c()]
color.FBraw <-  c("#FF6C91","lightgrey",color.FB)


color.cnt <- color.FB[c(1,3,5,7)]               
scales::show_col(color.FB, ncol = 4)

scales::show_col(color.FBraw, ncol = 6)

scales::show_col(color.cnt, ncol = 2)

par(mfrow=c(2,4))

#plot(sort(rowSums(t(FB))),
#     xlab="reordered cells", 
#     ylab="UMI counts", log="xy",
#     main="sum")


plot(sort(t(FB)[,1]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[1])
plot(sort(t(FB)[,2]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[2])
plot(sort(t(FB)[,3]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[3])
plot(sort(t(FB)[,4]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[4])

plot(sort(t(FB)[,5]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[5])
plot(sort(t(FB)[,6]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[6])
plot(sort(t(FB)[,7]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[7])
plot(sort(t(FB)[,8]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[8])

par(mfrow=c(2,4))

#plot(sort(rowSums(t(FB))),
#     xlab="reordered cells", 
#     ylab="UMI counts", log="xy",
#     main="sum")


plot(sort(t(FB)[,1],decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[1])
plot(sort(t(FB)[,2],decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[2])
plot(sort(t(FB)[,3],decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[3])
plot(sort(t(FB)[,4],decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[4])

plot(sort(t(FB)[,5],decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[5])
plot(sort(t(FB)[,6],decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[6])
plot(sort(t(FB)[,7],decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[7])
plot(sort(t(FB)[,8],decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[8])

range ‘q’

q0.50 ~ 0.99
#table.demux
#color.FB <- scales::hue_pal()(10)
par(mfrow=c(3,4))

plot(table.demux$Doublet, type="p", col="#FF6C91", pch=16, ylab="Doublet")
plot(table.demux$Negative, type="p", col="lightgrey", pch=16, ylab="Negative")
plot(rowSums(table.demux[,grep("CTL|CKO",colnames(table.demux))]), type="p", col="#306000", pch=16, ylab="Singlet")

plot(table.demux$quantile, pch=16, ylab="mod-quantile")
#plot.new()

plot(table.demux[,2+1], type="p", col=color.FB[1], pch=16, ylab=colnames(table.demux)[2+1])
plot(table.demux[,2+2], type="p", col=color.FB[2], pch=16, ylab=colnames(table.demux)[2+2])
plot(table.demux[,2+3], type="p", col=color.FB[3], pch=16, ylab=colnames(table.demux)[2+3])
plot(table.demux[,2+4], type="p", col=color.FB[4], pch=16, ylab=colnames(table.demux)[2+4])
plot(table.demux[,2+5], type="p", col=color.FB[5], pch=16, ylab=colnames(table.demux)[2+5])
plot(table.demux[,2+6], type="p", col=color.FB[6], pch=16, ylab=colnames(table.demux)[2+6])
plot(table.demux[,2+7], type="p", col=color.FB[7], pch=16, ylab=colnames(table.demux)[2+7])
plot(table.demux[,2+8], type="p", col=color.FB[8], pch=16, ylab=colnames(table.demux)[2+8])

#plot(table.demux[,2+9], type="p", col=color.FB[9], pch=16, ylab=colnames(table.demux)[2+9])
#plot(table.demux[,2+10], type="p", col=color.FB[10], pch=16, ylab=colnames(table.demux)[2+10])
q0.9900 ~ 0.9999
options(max.print=5000)
#table.demux.s
#color.FB <- scales::hue_pal()(10)
par(mfrow=c(3,4))


plot(table.demux.s$Doublet, type="p", col="#FF6C91", pch=16, ylab="Doublet")
plot(table.demux.s$Negative, type="p", col="lightgrey", pch=16, ylab="Negative")
plot(rowSums(table.demux.s[,grep("CTL|CKO",colnames(table.demux.s))]), type="p", col="#306000", pch=16, ylab="Singlet")

plot(table.demux.s$quantile, pch=16, ylab="mod-quantile")
#plot.new()

plot(table.demux.s[,2+1], type="p", col=color.FB[1], pch=16, ylab=colnames(table.demux.s)[2+1])
plot(table.demux.s[,2+2], type="p", col=color.FB[2], pch=16, ylab=colnames(table.demux.s)[2+2])
plot(table.demux.s[,2+3], type="p", col=color.FB[3], pch=16, ylab=colnames(table.demux.s)[2+3])
plot(table.demux.s[,2+4], type="p", col=color.FB[4], pch=16, ylab=colnames(table.demux.s)[2+4])
plot(table.demux.s[,2+5], type="p", col=color.FB[5], pch=16, ylab=colnames(table.demux.s)[2+5])
plot(table.demux.s[,2+6], type="p", col=color.FB[6], pch=16, ylab=colnames(table.demux.s)[2+6])
plot(table.demux.s[,2+7], type="p", col=color.FB[7], pch=16, ylab=colnames(table.demux.s)[2+7])
plot(table.demux.s[,2+8], type="p", col=color.FB[8], pch=16, ylab=colnames(table.demux.s)[2+8])

#plot(table.demux.s[,2+9], type="p", col=color.FB[9], pch=16, ylab=colnames(table.demux.s)[2+9])
#plot(table.demux.s[,2+10], type="p", col=color.FB[10], pch=16, ylab=colnames(table.demux.s)[2+10])

demultiplexing

check q0.90~q0.99
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.90)
## Cutoff for LUNG.CTL1 : 78 reads
## Cutoff for LUNG.CTL2 : 105 reads
## Cutoff for LUNG.CKO1 : 82 reads
## Cutoff for LUNG.CKO2 : 43 reads
## Cutoff for BALF.CTL1 : 37 reads
## Cutoff for BALF.CTL2 : 58 reads
## Cutoff for BALF.CKO1 : 53 reads
## Cutoff for BALF.CKO2 : 82 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    13131      489    21747
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##   Doublet  Negative LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2 
##     13131       489      2773      2598      2813      2953      2730      2460 
## BALF.CKO1 BALF.CKO2 
##      2907      2513
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.91)
## Cutoff for LUNG.CTL1 : 81 reads
## Cutoff for LUNG.CTL2 : 109 reads
## Cutoff for LUNG.CKO1 : 86 reads
## Cutoff for LUNG.CKO2 : 45 reads
## Cutoff for BALF.CTL1 : 38 reads
## Cutoff for BALF.CTL2 : 61 reads
## Cutoff for BALF.CKO1 : 55 reads
## Cutoff for BALF.CKO2 : 86 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    13043      538    21786
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##   Doublet  Negative LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2 
##     13043       538      2773      2610      2807      2962      2733      2463 
## BALF.CKO1 BALF.CKO2 
##      2920      2518
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.92)
## Cutoff for LUNG.CTL1 : 85 reads
## Cutoff for LUNG.CTL2 : 114 reads
## Cutoff for LUNG.CKO1 : 90 reads
## Cutoff for LUNG.CKO2 : 47 reads
## Cutoff for BALF.CTL1 : 40 reads
## Cutoff for BALF.CTL2 : 64 reads
## Cutoff for BALF.CKO1 : 58 reads
## Cutoff for BALF.CKO2 : 90 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    12922      623    21822
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##   Doublet  Negative LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2 
##     12922       623      2782      2613      2808      2971      2722      2477 
## BALF.CKO1 BALF.CKO2 
##      2930      2519
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.93)
## Cutoff for LUNG.CTL1 : 90 reads
## Cutoff for LUNG.CTL2 : 120 reads
## Cutoff for LUNG.CKO1 : 94 reads
## Cutoff for LUNG.CKO2 : 50 reads
## Cutoff for BALF.CTL1 : 42 reads
## Cutoff for BALF.CTL2 : 67 reads
## Cutoff for BALF.CKO1 : 61 reads
## Cutoff for BALF.CKO2 : 94 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    12804      722    21841
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##   Doublet  Negative LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2 
##     12804       722      2783      2616      2804      2985      2720      2493 
## BALF.CKO1 BALF.CKO2 
##      2936      2504
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.94)
## Cutoff for LUNG.CTL1 : 95 reads
## Cutoff for LUNG.CTL2 : 127 reads
## Cutoff for LUNG.CKO1 : 99 reads
## Cutoff for LUNG.CKO2 : 53 reads
## Cutoff for BALF.CTL1 : 44 reads
## Cutoff for BALF.CTL2 : 71 reads
## Cutoff for BALF.CKO1 : 65 reads
## Cutoff for BALF.CKO2 : 100 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    12688      875    21804
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##   Doublet  Negative LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2 
##     12688       875      2773      2614      2792      2978      2721      2500 
## BALF.CKO1 BALF.CKO2 
##      2945      2481
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.95)
## Cutoff for LUNG.CTL1 : 101 reads
## Cutoff for LUNG.CTL2 : 135 reads
## Cutoff for LUNG.CKO1 : 106 reads
## Cutoff for LUNG.CKO2 : 57 reads
## Cutoff for BALF.CTL1 : 47 reads
## Cutoff for BALF.CTL2 : 75 reads
## Cutoff for BALF.CKO1 : 69 reads
## Cutoff for BALF.CKO2 : 106 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    12525     1044    21798
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##   Doublet  Negative LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2 
##     12525      1044      2769      2605      2794      2984      2714      2513 
## BALF.CKO1 BALF.CKO2 
##      2953      2466
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.96)
## Cutoff for LUNG.CTL1 : 108 reads
## Cutoff for LUNG.CTL2 : 145 reads
## Cutoff for LUNG.CKO1 : 113 reads
## Cutoff for LUNG.CKO2 : 61 reads
## Cutoff for BALF.CTL1 : 50 reads
## Cutoff for BALF.CTL2 : 81 reads
## Cutoff for BALF.CKO1 : 74 reads
## Cutoff for BALF.CKO2 : 113 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    12320     1262    21785
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##   Doublet  Negative LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2 
##     12320      1262      2743      2604      2788      2991      2702      2533 
## BALF.CKO1 BALF.CKO2 
##      2971      2453
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.97)
## Cutoff for LUNG.CTL1 : 117 reads
## Cutoff for LUNG.CTL2 : 157 reads
## Cutoff for LUNG.CKO1 : 123 reads
## Cutoff for LUNG.CKO2 : 67 reads
## Cutoff for BALF.CTL1 : 55 reads
## Cutoff for BALF.CTL2 : 88 reads
## Cutoff for BALF.CKO1 : 81 reads
## Cutoff for BALF.CKO2 : 123 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    12067     1642    21658
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##   Doublet  Negative LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2 
##     12067      1642      2723      2604      2786      2988      2657      2546 
## BALF.CKO1 BALF.CKO2 
##      2966      2388
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.98)
## Cutoff for LUNG.CTL1 : 131 reads
## Cutoff for LUNG.CTL2 : 175 reads
## Cutoff for LUNG.CKO1 : 136 reads
## Cutoff for LUNG.CKO2 : 75 reads
## Cutoff for BALF.CTL1 : 61 reads
## Cutoff for BALF.CTL2 : 97 reads
## Cutoff for BALF.CKO1 : 90 reads
## Cutoff for BALF.CKO2 : 137 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    11692     2205    21470
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##   Doublet  Negative LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2 
##     11692      2205      2680      2610      2764      2985      2612      2579 
## BALF.CKO1 BALF.CKO2 
##      2951      2289
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.99)
## Cutoff for LUNG.CTL1 : 153 reads
## Cutoff for LUNG.CTL2 : 206 reads
## Cutoff for LUNG.CKO1 : 160 reads
## Cutoff for LUNG.CKO2 : 88 reads
## Cutoff for BALF.CTL1 : 71 reads
## Cutoff for BALF.CTL2 : 114 reads
## Cutoff for BALF.CKO1 : 107 reads
## Cutoff for BALF.CKO2 : 160 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    10966     3395    21006
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##   Doublet  Negative LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2 
##     10966      3395      2576      2621      2666      2984      2517      2629 
## BALF.CKO1 BALF.CKO2 
##      2891      2122

manual cutoffs

## q0.99 
#cutoff.FB <- c(153,206,160,88,
#               71,114,107,160)
#
## ref to  
# LUNG.CTL1  0.93
# LUNG.CTL2  0.94
# LUNG.CKO1  0.93/0.91
# LUNG.CKO2  0.99/0.97
# BALF.CTL1  0.91/0.93
# BALF.CTL2  0.99
# BALF.CKO1  0.96
# BALF.CKO2  0.92

# manual 
cutoff.FB <- c(90,127,86,67,
               42,114,74,90)
names(cutoff.FB) <- rownames(FB.seur)
cutoff.FB
## LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2 BALF.CKO1 BALF.CKO2 
##        90       127        86        67        42       114        74        90
data.frame(cutoff=cutoff.FB)
##           cutoff
## LUNG.CTL1     90
## LUNG.CTL2    127
## LUNG.CKO1     86
## LUNG.CKO2     67
## BALF.CTL1     42
## BALF.CTL2    114
## BALF.CKO1     74
## BALF.CKO2     90
par(mfrow=c(2,4))

#plot(sort(rowSums(t(FB))),
#     xlab="reordered cells", 
#     ylab="UMI counts", log="xy",
#     main="sum")


plot(sort(t(FB)[,1]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[1])
abline(h=cutoff.FB[1],col = color.FB[1])
plot(sort(t(FB)[,2]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[2])
abline(h=cutoff.FB[2],col = color.FB[2])
plot(sort(t(FB)[,3]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[3])
abline(h=cutoff.FB[3],col = color.FB[3])
plot(sort(t(FB)[,4]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[4])
abline(h=cutoff.FB[4],col = color.FB[4])

plot(sort(t(FB)[,5]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[5])
abline(h=cutoff.FB[5],col = color.FB[5])

plot(sort(t(FB)[,6]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[6])
abline(h=cutoff.FB[6],col = color.FB[6])
plot(sort(t(FB)[,7]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[7])
abline(h=cutoff.FB[7],col = color.FB[7])
plot(sort(t(FB)[,8]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[8])
abline(h=cutoff.FB[8],col = color.FB[8])

par(mfrow=c(2,4))

#plot(sort(rowSums(t(FB))),
#     xlab="reordered cells", 
#     ylab="UMI counts", log="xy",
#     main="sum")


plot(sort(t(FB)[,1], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[1])
abline(h=cutoff.FB[1],col = color.FB[1])
plot(sort(t(FB)[,2], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[2])
abline(h=cutoff.FB[2],col = color.FB[2])
plot(sort(t(FB)[,3], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[3])
abline(h=cutoff.FB[3],col = color.FB[3])
plot(sort(t(FB)[,4], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[4])
abline(h=cutoff.FB[4],col = color.FB[4])

plot(sort(t(FB)[,5], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[5])
abline(h=cutoff.FB[5],col = color.FB[5])

plot(sort(t(FB)[,6], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[6])
abline(h=cutoff.FB[6],col = color.FB[6])
plot(sort(t(FB)[,7], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[7])
abline(h=cutoff.FB[7],col = color.FB[7])
plot(sort(t(FB)[,8], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[8])
abline(h=cutoff.FB[8],col = color.FB[8])

## custom cutoff run this
custom.Demux <- function(FBobj,FBcutoff){
  # define decision matrix
  dd <- FBobj@assays[['HTO']]@counts
  dd <- apply(dd, 2, function(x){
    x > FBcutoff
  })
  x1 <- colSums(dd)
  x1[x1>1] <- "Doublet"
  x1[x1==0] <- "Negative"
  x1[x1==1] <- "Singlet"

  x2 <- x1
  
  bc.slt  <- names(x1)[x1=="Singlet"]
  
  for(bc in bc.slt){
    x2[bc] <- rownames(dd)[dd[,bc]]
  }
  
  FBdf <- data.frame(HTO_classification.global=factor(x1,
                                                      levels = c("Doublet","Negative","Singlet")),
                     hash.ID=factor(x2,
                                    levels = c("Doublet","Negative",rownames(dd))))
  return(FBdf)
}

df.FB <- custom.Demux(FB.seur,cutoff.FB)
## custom cutoff run this
dim(df.FB)
## [1] 35367     2
df.FB[1:10,]
##                    HTO_classification.global   hash.ID
## AAACCCAAGACATAAC-1                   Singlet BALF.CTL1
## AAACCCAAGAGCATTA-1                   Doublet   Doublet
## AAACCCAAGCATCAAA-1                   Doublet   Doublet
## AAACCCAAGCCGGAAT-1                   Singlet BALF.CKO2
## AAACCCAAGCTAAACA-1                   Doublet   Doublet
## AAACCCAAGGTAAGGA-1                   Singlet LUNG.CTL2
## AAACCCAAGGTCACCC-1                   Doublet   Doublet
## AAACCCAAGTGGCCTC-1                   Doublet   Doublet
## AAACCCACAAACGTGG-1                   Singlet LUNG.CKO2
## AAACCCACAACACAAA-1                   Singlet LUNG.CTL2
## custom cutoff run this
table(df.FB)
##                          hash.ID
## HTO_classification.global Doublet Negative LUNG.CTL1 LUNG.CTL2 LUNG.CKO1
##                  Doublet    12663        0         0         0         0
##                  Negative       0      902         0         0         0
##                  Singlet        0        0      2814      2626      2859
##                          hash.ID
## HTO_classification.global LUNG.CKO2 BALF.CTL1 BALF.CTL2 BALF.CKO1 BALF.CKO2
##                  Doublet          0         0         0         0         0
##                  Negative         0         0         0         0         0
##                  Singlet       2905      2745      2399      2922      2532
## custom cutoff run this
FB.seur@meta.data[,c("HTO_classification.global","hash.ID")] <- df.FB
FB.seur@meta.data[1:4,]
##                    orig.ident nCount_RNA nFeature_RNA nCount_HTO nFeature_HTO
## AAACCCAAGACATAAC-1    Lung_FB        191            8        191            8
## AAACCCAAGAGCATTA-1    Lung_FB        299            8        299            8
## AAACCCAAGCATCAAA-1    Lung_FB       5127            8       5127            8
## AAACCCAAGCCGGAAT-1    Lung_FB        262            8        262            8
##                    HTO_maxID HTO_secondID HTO_margin  HTO_classification
## AAACCCAAGACATAAC-1 BALF.CTL1    LUNG.CTL2  1.1966616           BALF.CTL1
## AAACCCAAGAGCATTA-1 BALF.CKO1    LUNG.CKO1  0.6018605           BALF.CKO1
## AAACCCAAGCATCAAA-1 LUNG.CTL1    LUNG.CKO2  2.2958784 LUNG.CKO2_LUNG.CTL1
## AAACCCAAGCCGGAAT-1 BALF.CKO2    BALF.CTL2  1.3042646           BALF.CKO2
##                    HTO_classification.global   hash.ID
## AAACCCAAGACATAAC-1                   Singlet BALF.CTL1
## AAACCCAAGAGCATTA-1                   Doublet   Doublet
## AAACCCAAGCATCAAA-1                   Doublet   Doublet
## AAACCCAAGCCGGAAT-1                   Singlet BALF.CKO2
## default q0.99 run this
#FB.seur$HTO_classification.global <- factor(as.character(FB.seur$HTO_classification.global),
#                                            levels = c("Doublet","Negative","Singlet"))
#FB.seur$hash.ID <- factor(as.character(FB.seur$hash.ID),
#                          levels = c("Doublet","Negative",rownames(FB.seur)))
sl_stat <- table(FB.seur$HTO_classification.global)
barplot(sl_stat,ylim = c(0,25000),col = c("#FF6C91","lightgrey","#7CAE00"),main = "Feature Barcode statistics")
text(x=1:3*1.2-0.5,y=sl_stat+1550,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

RidgePlot

with ridge plots, raw
FB.seur@active.ident <- factor(FB.seur$HTO_maxID,levels=rownames(FB.seur))

RidgePlot(FB.seur, assay = 'HTO', features = rownames(FB.seur[['HTO']]),same.y.lims= TRUE, ncol=4,
          cols = color.FB) 

with ridge plots, filtered
FB.seur@meta.data$hash.ID.sort <- factor(as.character(FB.seur@meta.data$hash.ID),levels = c("Doublet","Negative",levels(FB.seur@active.ident)))
RidgePlot(FB.seur, assay = 'HTO', features = rownames(FB.seur[['HTO']]), ncol=4,same.y.lims= TRUE,
          cols = c("#FF6C91","lightgrey",color.FB),group.by = "hash.ID.sort") 

tSNE for HTOs

# first, remove negative cells from th object
#FB.seur.subset <- FB.seur

# Calculate a tSNE embedding of the HTO data
#DefaultAssay(FB.seur.subset) <- "HTO"

#FB.seur.subset <- ScaleData(FB.seur.subset, features = rownames(FB.seur.subset), verbose = FALSE)
#FB.seur.subset <- RunPCA(FB.seur.subset, features = rownames(FB.seur.subset), approx = FALSE)
#FB.seur.subset <- RunTSNE(FB.seur.subset, dims = 1:8, perplexity = 500, check_duplicates = FALSE)


#saveRDS(FB.seur.subset, "FB0407.seur.subset.rds")
FB.seur.subset <- readRDS("FB0407.seur.subset.rds")
multiplot(
DimPlot(FB.seur.subset, order = rev(rownames(FB.seur)),
        cols = color.FB) + labs(title = "FB Max_ID"), 
DimPlot(FB.seur.subset, order = c("Doublet",rev(rownames(FB.seur)),"Negative"), group.by = 'hash.ID.sort', label = F,
        cols = c("lightgrey",color.FB,"#FF6C91")) + 
  labs(title = "FB Filtered_ID") + theme(plot.title = element_text(hjust = 0)) ,
cols = 1)

stat

Idents(FB.seur) <- "HTO_classification.global"
FB.seur$HTO_classification.global <- factor(as.character(FB.seur$HTO_classification.global),levels = c("Doublet","Negative","Singlet"))
VlnPlot(FB.seur, features = "nCount_RNA", pt.size = 0, log = TRUE, cols = c("#FF6C91","lightgrey","#7CAE00")) &
  geom_jitter(alpha=0.15, shape=16, width = 0.3 ,size = 0.12)

table(FB.seur@meta.data$hash.ID.sort)
## 
##   Doublet  Negative LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2 
##     12663       902      2814      2626      2859      2905      2745      2399 
## BALF.CKO1 BALF.CKO2 
##      2922      2532
sl_stat <- table(FB.seur$hash.ID.sort)
barplot(sl_stat,ylim = c(0,15500),col = color.FBraw,
        main = "Feature Barcode statistics",cex.names = 0.75)
text(x=1:10*1.2-0.45,y=sl_stat+645,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

GEX

mainly follow https://satijalab.org/seurat/articles/pbmc3k_tutorial.html

GEX.seur <- CreateSeuratObject(counts = GEX,
                               min.cells = 3,
                               min.features = 100,
                               project = "Lung_GEX")
GEX.seur
## An object of class Seurat 
## 18826 features across 35358 samples within 1 assay 
## Active assay: RNA (18826 features, 0 variable features)

filtering

MT genes

grep("^mt-",rownames(GEX),value = T)
##  [1] "mt-Nd1"  "mt-Nd2"  "mt-Co1"  "mt-Co2"  "mt-Atp8" "mt-Atp6" "mt-Co3" 
##  [8] "mt-Nd3"  "mt-Nd4l" "mt-Nd4"  "mt-Nd5"  "mt-Nd6"  "mt-Cytb"
MT_gene <- grep("^mt-",rownames(GEX.seur),value = T)
GEX.seur[["percent.mt"]] <- PercentageFeatureSet(GEX.seur, features = MT_gene)

RB_gene <- grep("^Rps|^Rpl",rownames(GEX.seur),value = T)
GEX.seur[["percent.rb"]] <- PercentageFeatureSet(GEX.seur, features = RB_gene)

# Visualize QC metrics as a violin plot
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.01)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb")
plota + plotb + plotc

add FB info

GEX.seur$FB.info <- FB.seur@meta.data[rownames(GEX.seur@meta.data),"hash.ID"]
#head(GEX.seur@meta.data)
table(GEX.seur$FB.info)
## 
##   Doublet  Negative LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2 BALF.CTL1 BALF.CTL2 
##     12663       898      2814      2625      2858      2904      2744      2399 
## BALF.CKO1 BALF.CKO2 
##      2921      2532
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0.1, split.by = "FB.info")
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##       
## This message will be shown once per session.

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0, split.by = "FB.info")

GEX.seur <- subset(GEX.seur, subset = FB.info!="Doublet" & FB.info!="Negative")
GEX.seur
## An object of class Seurat 
## 18826 features across 21797 samples within 1 assay 
## Active assay: RNA (18826 features, 0 variable features)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.1)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0.1, split.by = "FB.info")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0, split.by = "FB.info")

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb") 
plota + plotb + plotc

filtering

## keep low nFeature for granulocytes specifically neutrophils 
#GEX.seur <- subset(GEX.seur, subset = percent.mt < 15 & nFeature_RNA >= 500 & nFeature_RNA < 6000 & nCount_RNA < 40000)
GEX.seur <- subset(GEX.seur, subset = percent.mt < 15 & nFeature_RNA >= 100 & nFeature_RNA < 6000 & nCount_RNA < 32000)
GEX.seur
## An object of class Seurat 
## 18826 features across 21447 samples within 1 assay 
## Active assay: RNA (18826 features, 0 variable features)
filtered obj
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.1)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = color.FBraw,
        pt.size = 0.1, split.by = "FB.info")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = color.FBraw,
        pt.size = 0, split.by = "FB.info")

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb") 
plota + plotb + plotc

par(mar=c(6,4,2,3))
sl_stat <- table(FB.seur$hash.ID.sort)
barplot(sl_stat,ylim = c(0,14800),
        col = c("#FF6C91","lightgrey",color.FB),
        main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:10*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID.sort), las=3, cex.axis=0.85)
text(x=1:10*1.2-0.45,y=sl_stat+875,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

par(mar=c(6,4,2,3))
sl_stat <- table(GEX.seur$FB.info)
barplot(sl_stat,ylim = c(0,4800),
        col = c("#FF6C91","lightgrey",color.FB),
        main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:10*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID.sort), las=3, cex.axis=0.85)
text(x=1:10*1.2-0.45,y=sl_stat+325,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

check cellcycle

s.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$s.genes))
g2m.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$g2m.genes))

GEX.seur <- CellCycleScoring(GEX.seur,
                             s.features = s.genes,
                             g2m.features = g2m.genes)
VlnPlot(GEX.seur, features = c("S.Score", "G2M.Score"), group.by = "FB.info", 
    ncol = 2, pt.size = 0.1)

GEX.seur.cc <- GEX.seur

GEX.seur.cc <- NormalizeData(GEX.seur.cc)
GEX.seur.cc <- FindVariableFeatures(GEX.seur.cc)
GEX.seur.cc <- ScaleData(GEX.seur.cc)
Idents(GEX.seur.cc) <- "Phase"
RidgePlot(GEX.seur.cc, features = c("Pcna", "Top2a", "Mcm6", "Mki67"), ncol = 2)

GEX.seur.cc <- RunPCA(GEX.seur.cc, features = c(s.genes, g2m.genes))
DimPlot(GEX.seur.cc, reduction = 'pca')

Markers and Clusters

Normalizing

GEX.seur <- NormalizeData(GEX.seur, normalization.method = "LogNormalize", scale.factor = 10000)
GEX.seur <- FindVariableFeatures(GEX.seur, selection.method = "vst", nfeatures = 1500)

# Identify the 10 most highly variable genes
top10 <- head(VariableFeatures(GEX.seur), 10)

# plot variable features with and without labels
plot1 <- VariableFeaturePlot(GEX.seur)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
plot1 + plot2

need to remove TCR/BCR genes as DIGs

head(VariableFeatures(GEX.seur), 200)
##   [1] "Igkc"          "Ighm"          "Jchain"        "Mzb1"         
##   [5] "Ccl22"         "Retnla"        "Gm15056"       "Mfge8"        
##   [9] "Mucl1"         "Aldh1a2"       "Ccl7"          "Ccl8"         
##  [13] "Hmox1"         "Il13"          "Areg"          "Ngp"          
##  [17] "Iglc2"         "Ccl17"         "Ccl2"          "Il12b"        
##  [21] "Fscn1"         "Ly6d"          "C1qa"          "Camp"         
##  [25] "Iglc1"         "Spp1"          "AY761185"      "Ltf"          
##  [29] "Pf4"           "Calca"         "Igha"          "Retnlg"       
##  [33] "Pou2af1"       "Ccl1"          "Cxcl10"        "Tpsab1"       
##  [37] "Tph1"          "Clu"           "Klk1"          "Cpa3"         
##  [41] "Rbp4"          "Cd209e"        "Arg1"          "Fkbp11"       
##  [45] "Apoe"          "Cd209d"        "Siglech"       "Ccr7"         
##  [49] "Cxcl3"         "Csf2"          "Igkv14-126"    "Timp1"        
##  [53] "Ccl24"         "Fabp4"         "Ms4a1"         "C1qc"         
##  [57] "Cxcl9"         "Marco"         "Il5"           "Mgl2"         
##  [61] "C1qb"          "Ly6a"          "Ldoc1"         "Cd209a"       
##  [65] "Il6"           "Gpnmb"         "Il4i1"         "Cxcl1"        
##  [69] "Inhba"         "Ccr9"          "Mmp12"         "Il17a"        
##  [73] "Cd79a"         "Ccl12"         "Lyz1"          "Mmp13"        
##  [77] "Mcpt8"         "Saa3"          "Ch25h"         "Iglc3"        
##  [81] "Hist1h1b"      "Il2ra"         "Rpl39l"        "Cst3"         
##  [85] "H2-M2"         "Rsad2"         "Gm14275"       "Il10"         
##  [89] "Cstdc5"        "Ube2c"         "Zmynd15"       "Derl3"        
##  [93] "Cox6a2"        "Gzmb"          "Cma1"          "Serpina3g"    
##  [97] "Gata2"         "Krt79"         "Fbp1"          "Alox15"       
## [101] "Ctsk"          "Ckb"           "Akap12"        "Cstdc4"       
## [105] "Tff1"          "Syt7"          "Ctla2a"        "Gm26917"      
## [109] "Hspa1a"        "Gm13546"       "Mt2"           "Xcr1"         
## [113] "Snhg1"         "Iglv1"         "Top2a"         "Gzmc"         
## [117] "AA467197"      "Hist1h2ae"     "Snhg12"        "Scgb1a1"      
## [121] "Slpi"          "Cd207"         "Lcn2"          "Spata7"       
## [125] "Tbc1d4"        "Tnfrsf17"      "Hist1h2ap"     "Cd163l1"      
## [129] "Dcstamp"       "Hspa1b"        "Sept3"         "Edn1"         
## [133] "Cacnb3"        "Asic3"         "Eaf2"          "1500009L16Rik"
## [137] "Penk"          "Scin"          "Ear1"          "Klk4"         
## [141] "Pkib"          "Pclaf"         "Il17f"         "Hba-a1"       
## [145] "Hpgd"          "Car4"          "Ctla4"         "Igkv10-96"    
## [149] "Mki67"         "Mt3"           "Xcl1"          "Cbr2"         
## [153] "2410006H16Rik" "Cd79b"         "Timd4"         "Plet1"        
## [157] "Stfa2"         "Wfdc21"        "Fcer1a"        "Serpinb6b"    
## [161] "Pcsk1"         "Igfbp4"        "Hbb-bs"        "Fcrls"        
## [165] "Pcp4"          "Gzma"          "Nos2"          "H2-Eb1"       
## [169] "Igkv12-44"     "Wfdc2"         "Hba-a2"        "Tcf4"         
## [173] "Txndc5"        "Cenpf"         "Gm21762"       "Hamp"         
## [177] "Tnfrsf4"       "Hmgb2"         "Ppbp"          "Ccdc80"       
## [181] "Ebf1"          "Ifi44"         "Ccl5"          "Lpl"          
## [185] "Tnf"           "Rrm2"          "Rnase2a"       "Nusap1"       
## [189] "Ear2"          "Il1rl1"        "Cyp7b1"        "Ctsl"         
## [193] "Hs3st1"        "Zfas1"         "Ms4a4c"        "Cyp11a1"      
## [197] "Tcaf2"         "Cfb"           "Ear6"          "H2-Ab1"
GEX.seur <- ScaleData(GEX.seur, features = rownames(GEX.seur))
## Centering and scaling data matrix

PCA

# exclude MT genes  and more 
            
DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|Mzb1|Vpreb|Lars2|Jun|Fos|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^AC|^AA|^AW|^BC|^Gm|^Hist|Rik$|-ps",
            rownames(GEX.seur),value = T)
CC_gene <- Hmisc::capitalize(tolower(as.vector(unlist(cc.genes.updated.2019))))


VariableFeatures(GEX.seur) <- setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,
                                                  DIG, 
                                                  CC_gene) )

GEX.seur <- RunPCA(GEX.seur, features = VariableFeatures(GEX.seur))
## PC_ 1 
## Positive:  S100a9, S100a8, Isg15, Acod1, Hdc, Ccl4, Ccrl2, Asprv1, Ifitm1, Rsad2 
##     Egr1, Clec4e, Ccl3, Isg20, Ifit1, Lcn2, Ifitm3, Cstdc4, Chil1, Retnlg 
##     Stfa2l1, Mrgpra2b, Cmpk2, Cxcl2, Ifit3b, Oasl2, Gbp2b, Ly6e, Ptgs2, Scgb1a1 
## Negative:  Anxa5, Ppp1r14b, Atp5g1, Tuba1b, Myof, Lpl, Chchd10, Prdx1, Mrc1, Slc7a2 
##     Rpn1, Vat1, Cisd1, Dbi, Sdf2l1, Nrp2, Atp6v0d2, Pdia6, Anxa4, Pdia4 
##     Axl, Ptma, Calr, Ranbp1, Plet1, Fabp5, S100a1, Ctss, Shtn1, Lgmn 
## PC_ 2 
## Positive:  Chil3, Ctsd, Ccl6, Lyz2, Ctsb, Ctss, Atp6v0d2, Slc7a2, Mrc1, F7 
##     Cybb, Clec4a3, Itgax, Ccl9, F10, Lgals3, Myof, Anxa4, Lrp1, Lpl 
##     Ear2, Fn1, Plet1, Tgm2, Wfdc17, Nrp2, Mgll, Cd68, Dab2, Rnase2a 
## Negative:  Gata3, Rora, Il2ra, Hlf, Ptprcap, Ctla2a, Pdcd1, Icos, Dut, Skap1 
##     Selenoh, Itgb7, Pclaf, Sh2d2a, Il1rl1, Lig1, Itk, Ar, Cxcr6, Smco4 
##     Nrip1, Ptpn13, Plod2, Dkk3, Ptpn22, Ikzf3, Nup210, Pmepa1, Ctla4, Ramp1 
## PC_ 3 
## Positive:  Ctsd, Chil3, Atp6v0d2, Mgll, Ctsk, Car4, Fpr2, Krt79, Ch25h, Slc7a2 
##     Bhlhe41, Abcg1, Kcnn3, Ms4a8a, Dst, Abcc5, Cfb, Plet1, F7, Trim29 
##     Sort1, Phgdh, Olr1, B3gnt7, Cisd1, Cidec, Il11ra1, Pld3, Abcd2, Emilin1 
## Negative:  Cd74, H2-Aa, H2-Ab1, H2-Eb1, H2-DMb1, Cbfa2t3, H2-DMa, Cd83, Aif1, Ms4a6c 
##     Ccr2, S100a4, Pld4, Cd86, Sdc4, Tmem176b, Ifi30, Napsa, Plbd1, Ms4a4c 
##     Syngr2, Mafb, Tmem176a, Rnase6, Ahr, Ciita, Cst3, Ccl24, Mgl2, Ece1 
## PC_ 4 
## Positive:  Ifitm3, S100a9, S100a8, Cxcl2, Isg15, Pclaf, Ccrl2, Acod1, Rsad2, Ccna2 
##     Spc24, Cst3, Wfdc17, Kif15, Hdc, Clec4e, Cenpm, Knl1, Ifit1, Isg20 
##     Asf1b, Asprv1, Dut, Egr1, Tk1, Clec4n, Ctsb, Hmgb3, Atf3, Plbd1 
## Negative:  Nkg7, Gzma, Ccl5, Irf8, Prf1, Klrd1, Serpinb9, Ms4a4b, Eomes, Gzmb 
##     Ncr1, Satb1, Serpinb6b, Atp1b1, Bcl2, Serpinb9b, Klri2, Klra3, Klrc1, Cma1 
##     Sh2d2a, Gramd3, Cd7, Ccnd2, Il18r1, Ptprcap, Cd28, Klra7, Lgals1, Kcnq1ot1 
## PC_ 5 
## Positive:  Ccl22, Aldh1a2, Cacnb3, Ccl17, Plppr4, Il4i1, Lad1, Fscn1, Cxcl16, Nudt17 
##     Ccr7, Ramp3, Socs2, Nr4a3, Tbc1d4, Tspan33, Palld, Mir155hg, Flrt3, Strip2 
##     Sema6d, Tspan3, Dcstamp, Zmynd15, Mfge8, Rora, Rgs1, Il7r, Bcl2a1d, Cxcr6 
## Negative:  Siglech, Klk1, Cox6a2, Ccr9, Spib, Bcl11a, Tcf4, Cadm1, Mef2c, Klk1b27 
##     Rnase6, Pacsin1, Kmo, Lifr, Smim5, Sh3bgr, Ly6c2, Pld4, Fcrla, Plac8 
##     Bst2, Cyb561a3, Ly6c1, Ly6d, Flt3, Tmem108, Ly6e, Pltp, Upb1, Pou2af1
length(setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,DIG,CC_gene)))
## [1] 1265
setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,DIG,CC_gene))[1:300]
##   [1] "Ccl22"     "Retnla"    "Mfge8"     "Mucl1"     "Aldh1a2"   "Ccl7"     
##   [7] "Ccl8"      "Hmox1"     "Il13"      "Areg"      "Ngp"       "Ccl17"    
##  [13] "Ccl2"      "Il12b"     "Fscn1"     "Ly6d"      "C1qa"      "Camp"     
##  [19] "Spp1"      "Ltf"       "Pf4"       "Calca"     "Retnlg"    "Pou2af1"  
##  [25] "Ccl1"      "Cxcl10"    "Tpsab1"    "Tph1"      "Clu"       "Klk1"     
##  [31] "Cpa3"      "Rbp4"      "Cd209e"    "Arg1"      "Fkbp11"    "Apoe"     
##  [37] "Cd209d"    "Siglech"   "Ccr7"      "Cxcl3"     "Csf2"      "Timp1"    
##  [43] "Ccl24"     "Fabp4"     "Ms4a1"     "C1qc"      "Cxcl9"     "Marco"    
##  [49] "Il5"       "Mgl2"      "C1qb"      "Ly6a"      "Ldoc1"     "Cd209a"   
##  [55] "Il6"       "Gpnmb"     "Il4i1"     "Cxcl1"     "Inhba"     "Ccr9"     
##  [61] "Mmp12"     "Il17a"     "Cd79a"     "Ccl12"     "Lyz1"      "Mmp13"    
##  [67] "Mcpt8"     "Saa3"      "Ch25h"     "Il2ra"     "Cst3"      "H2-M2"    
##  [73] "Rsad2"     "Il10"      "Cstdc5"    "Zmynd15"   "Derl3"     "Cox6a2"   
##  [79] "Gzmb"      "Cma1"      "Serpina3g" "Gata2"     "Krt79"     "Fbp1"     
##  [85] "Alox15"    "Ctsk"      "Ckb"       "Akap12"    "Cstdc4"    "Tff1"     
##  [91] "Syt7"      "Ctla2a"    "Mt2"       "Xcr1"      "Snhg1"     "Gzmc"     
##  [97] "Snhg12"    "Scgb1a1"   "Slpi"      "Cd207"     "Lcn2"      "Spata7"   
## [103] "Tbc1d4"    "Tnfrsf17"  "Cd163l1"   "Dcstamp"   "Sept3"     "Edn1"     
## [109] "Cacnb3"    "Asic3"     "Eaf2"      "Penk"      "Scin"      "Ear1"     
## [115] "Klk4"      "Pkib"      "Pclaf"     "Il17f"     "Hpgd"      "Car4"     
## [121] "Ctla4"     "Mt3"       "Xcl1"      "Cbr2"      "Cd79b"     "Timd4"    
## [127] "Plet1"     "Stfa2"     "Wfdc21"    "Fcer1a"    "Serpinb6b" "Pcsk1"    
## [133] "Igfbp4"    "Fcrls"     "Pcp4"      "Gzma"      "Nos2"      "H2-Eb1"   
## [139] "Wfdc2"     "Tcf4"      "Txndc5"    "Hamp"      "Tnfrsf4"   "Ppbp"     
## [145] "Ccdc80"    "Ebf1"      "Ifi44"     "Ccl5"      "Lpl"       "Tnf"      
## [151] "Rnase2a"   "Ear2"      "Il1rl1"    "Cyp7b1"    "Ctsl"      "Hs3st1"   
## [157] "Zfas1"     "Ms4a4c"    "Cyp11a1"   "Tcaf2"     "Cfb"       "Ear6"     
## [163] "H2-Ab1"    "Cd36"      "Mt1"       "Klk1b11"   "Hlf"       "Cd40"     
## [169] "Bcl11a"    "Slc1a2"    "S100a1"    "Ifit1"     "Ly6g"      "Il4"      
## [175] "Fam71f2"   "Il1a"      "Fn1"       "Bex1"      "Serpinb2"  "Gata3"    
## [181] "Ifit2"     "Krt18"     "Hapln3"    "Ereg"      "Epcam"     "H2-Aa"    
## [187] "Fabp5"     "Mmp19"     "Dut"       "Nudt17"    "Thbs1"     "Igf1"     
## [193] "Ccl6"      "Slc7a2"    "Itgae"     "Ccn3"      "Atp6v0d2"  "Tnfsf8"   
## [199] "Ccl4"      "Il2"       "Chil4"     "Ly6c2"     "Lmo7"      "Bglap3"   
## [205] "Pdcd1"     "Mgll"      "Mcpt1"     "Mafb"      "F13a1"     "Calcb"    
## [211] "Ikzf2"     "Gas5"      "Ccl9"      "Hal"       "Plppr4"    "Apod"     
## [217] "Rgs6"      "Sdc4"      "Ifitm6"    "Mrc1"      "Cxcl16"    "Cd74"     
## [223] "Fpr2"      "Mmp10"     "Ramp3"     "Spib"      "Flrt3"     "H2-DMb1"  
## [229] "Car2"      "Mef2c"     "Prdx1"     "Gprin3"    "Prok2"     "Rrad"     
## [235] "Cybb"      "Prg2"      "Palld"     "Nppc"      "Olfm4"     "Krt19"    
## [241] "Ms4a8a"    "Msc"       "S100a8"    "Mmp25"     "Tspan3"    "Adig"     
## [247] "Ccl3"      "Asgr2"     "Nr4a3"     "Stab2"     "Rora"      "Creld2"   
## [253] "Nrg1"      "Selenoh"   "Bpifa1"    "Stfa2l1"   "Rnase6"    "Ocstamp"  
## [259] "Prc1"      "Ctsg"      "Pla2g2d"   "Cxcl5"     "Nupr1"     "Hilpda"   
## [265] "Dnase1l3"  "Cd83"      "Wfdc17"    "Fggy"      "Krt8"      "Gatm"     
## [271] "Adamts9"   "Egr4"      "Ccna2"     "Rgcc"      "Tmem176a"  "Olr1"     
## [277] "Cxcr6"     "Cldn1"     "Cdh1"      "Hes1"      "Olfr889"   "Ptcra"    
## [283] "Tnfrsf9"   "Tnfsf11"   "Scd1"      "Lipa"      "Cbfa2t3"   "Tgm2"     
## [289] "Bank1"     "Timp3"     "Fxyd3"     "Vcan"      "Cmpk2"     "Lad1"     
## [295] "Aif1"      "Tm4sf5"    "Lipc"      "Plac8"     "Icos"      "Clec10a"
DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "orig.ident") +
  DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "orig.ident")

DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "FB.info", cols = color.FB) +
  DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "FB.info", cols = color.FB)

DimHeatmap(GEX.seur, dims = 1:12, cells = 1500, balanced = TRUE,ncol = 4)

decide PCs to use
ElbowPlot(GEX.seur,ndims = 50)

PCs <- 1:32

GEX.seur <- FindNeighbors(GEX.seur, dims = PCs, k.param = 20)
## Computing nearest neighbor graph
## Computing SNN
GEX.seur <- FindClusters(GEX.seur, method = 'igraph', resolution = 2.8)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 21447
## Number of edges: 889971
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7962
## Number of communities: 39
## Elapsed time: 4 seconds

Run UMAP/tSNE

GEX.seur <- RunTSNE(GEX.seur, dims=PCs, complexity = 100)
GEX.seur <- RunUMAP(GEX.seur, dims=PCs, n.neighbors = 20, seed.use = 133)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 11:43:51 UMAP embedding parameters a = 0.9922 b = 1.112
## 11:43:51 Read 21447 rows and found 32 numeric columns
## 11:43:51 Using Annoy for neighbor search, n_neighbors = 20
## 11:43:51 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 11:43:54 Writing NN index file to temp file C:\Users\Shaorui\AppData\Local\Temp\RtmpSK3T0e\filead98213e264d
## 11:43:54 Searching Annoy index using 1 thread, search_k = 2000
## 11:43:59 Annoy recall = 100%
## 11:44:00 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 20
## 11:44:01 Initializing from normalized Laplacian + noise (using irlba)
## 11:44:02 Commencing optimization for 200 epochs, with 651750 positive edges
## 11:44:24 Optimization finished
DimPlot(GEX.seur, reduction = "tsne", label = T) + DimPlot(GEX.seur, reduction = "umap", label = T)

FeaturePlot(GEX.seur, reduction = "umap", features = c("nFeature_RNA","nCount_RNA","percent.mt","percent.rb"))

GEX.seur$cnt <- factor(gsub("1$|2$|3$|4$|5$","",as.character(GEX.seur$FB.info)),
                       levels = c("LUNG.CTL","LUNG.CKO",
                                  "BALF.CTL","BALF.CKO"))
DimPlot(GEX.seur, reduction = "umap", label = F,group.by = "FB.info", split.by = "FB.info", ncol = 4,
        cols = color.FB) 

DimPlot(GEX.seur, reduction = "umap", label = F,group.by = "cnt", split.by = "cnt", ncol = 2,
        cols = color.cnt) 

DoubletFinder

library(DoubletFinder)
## 载入需要的程辑包:KernSmooth
## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009
## 载入需要的程辑包:ROCR

## NULL
## [1] "Creating 7149 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
## [1] "Creating 7149 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
DimPlot(GEX.seur, reduction = "umap", group.by = "DoubletFinder0.05") +
  DimPlot(GEX.seur, reduction = "umap", group.by = "DoubletFinder0.1")

check some markers

# directly using prevously sorted markers
markers.preAnno <- list(CC=c("Pcna","Top2a","Mcm6","Mki67","Ptprc"),
                        Bcell=c("Cd19","Cd79a","Ms4a1","Ighd"),
                        Plasma=c("Ighm","Mzb1","Jchain","Sdc1"),
                        CD8T=c("Cd3d","Cd3e","Cd8a","Cd8b1","Sell"),
                        Treg=c("Foxp3"),
                        gdT=c("Trac","Tcrg-C1"),
                        NKs=c("Klrb1c","Ncr1","Eomes","Klrd1","Klre1","Prf1","Gzma","Gzmb","Ccl5","Nkg7","Serpinb6b","Serpinb9"),
                        #ILC2_CC=c(""),
                        ILC2=c("Gata3","Il1rl1","Ramp1","Areg","Calca","Stab2","Il5","Hilpda"),
                        BAS=c("Il6","Gata2","Cpa3","Ms4a2","Mcpt8","Cd200r3","Cyp11a1"),
                        EOS=c("Siglecf","Ear6","Ccr3","Ldhc"),
                        NEU1=c("Retnlg","Mmp8","Hp"),
                        NEU2=c("Cxcl3","Il1a","Tnf"),
                        NEU3=c("Cstb","Ccl4"),
                        #NEU4=c(""),
                        NEU5=c("Ifit1","Isg20","Stat1"),
                        pDC=c("Siglech","Tcf4","Ccr9","Klk1","Cox6a2"),
                        cDCs=c("Cd209a","Cd209d","Cd209e","Itgae","H2-Ab1","H2-Aa","H2-Eb1"),
                        migDC=c("Ccr7","Fscn1","Ccl22","Ccl17","Aldh1a2","Pkib","Il4i1","Batf3"),
                        Monocyte=c("Clec4a1","Plac8","Lyz2","S100a4","Cx3cr1","Ace","Adgre4","Ifitm6","F13a1","Ly6c1","Ly6c2"),
                        IM1=c("Arg1","Anpep","Lgals3","Sgk1","Spp1","Mmp19","Ccl7","Ccl2","Hal","Gpnmb"),
                        IM2=c("Retnla","Ccl24","Ece1"),
                        AM_CC=c("S100a1","Abcg1","Mgll","Chil3","Lpl","Lipa","Vat1","F7","Anxa4"),
                        AM1=c("Mt2","Cxcl1","Inhba","Cd14")
                        #AM2=c("")
                        )
as.vector(unlist(markers.preAnno))
##   [1] "Pcna"      "Top2a"     "Mcm6"      "Mki67"     "Ptprc"     "Cd19"     
##   [7] "Cd79a"     "Ms4a1"     "Ighd"      "Ighm"      "Mzb1"      "Jchain"   
##  [13] "Sdc1"      "Cd3d"      "Cd3e"      "Cd8a"      "Cd8b1"     "Sell"     
##  [19] "Foxp3"     "Trac"      "Tcrg-C1"   "Klrb1c"    "Ncr1"      "Eomes"    
##  [25] "Klrd1"     "Klre1"     "Prf1"      "Gzma"      "Gzmb"      "Ccl5"     
##  [31] "Nkg7"      "Serpinb6b" "Serpinb9"  "Gata3"     "Il1rl1"    "Ramp1"    
##  [37] "Areg"      "Calca"     "Stab2"     "Il5"       "Hilpda"    "Il6"      
##  [43] "Gata2"     "Cpa3"      "Ms4a2"     "Mcpt8"     "Cd200r3"   "Cyp11a1"  
##  [49] "Siglecf"   "Ear6"      "Ccr3"      "Ldhc"      "Retnlg"    "Mmp8"     
##  [55] "Hp"        "Cxcl3"     "Il1a"      "Tnf"       "Cstb"      "Ccl4"     
##  [61] "Ifit1"     "Isg20"     "Stat1"     "Siglech"   "Tcf4"      "Ccr9"     
##  [67] "Klk1"      "Cox6a2"    "Cd209a"    "Cd209d"    "Cd209e"    "Itgae"    
##  [73] "H2-Ab1"    "H2-Aa"     "H2-Eb1"    "Ccr7"      "Fscn1"     "Ccl22"    
##  [79] "Ccl17"     "Aldh1a2"   "Pkib"      "Il4i1"     "Batf3"     "Clec4a1"  
##  [85] "Plac8"     "Lyz2"      "S100a4"    "Cx3cr1"    "Ace"       "Adgre4"   
##  [91] "Ifitm6"    "F13a1"     "Ly6c1"     "Ly6c2"     "Arg1"      "Anpep"    
##  [97] "Lgals3"    "Sgk1"      "Spp1"      "Mmp19"     "Ccl7"      "Ccl2"     
## [103] "Hal"       "Gpnmb"     "Retnla"    "Ccl24"     "Ece1"      "S100a1"   
## [109] "Abcg1"     "Mgll"      "Chil3"     "Lpl"       "Lipa"      "Vat1"     
## [115] "F7"        "Anxa4"     "Mt2"       "Cxcl1"     "Inhba"     "Cd14"
DotPlot(GEX.seur, features = rev(as.vector(unlist(markers.preAnno))[1:60]), group.by = "seurat_clusters")  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, features = rev(as.vector(unlist(markers.preAnno))[61:120]), group.by = "seurat_clusters")  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

sort_clusters

GEX.seur$sort_clusters <- factor(as.character(GEX.seur$seurat_clusters),
                                 levels = c(34,      # Bcell
                                            35,         # Plasma
                                            20,21,32,27,     # Tcell  
                                            19,0,    # NKs
                                            38,    # CC mix
                                            9,16,33,    #  ILC2
                                            37,      # BAS
                                            14,      # EOS
                                            13,1,11,12,17,6,4,3,5,2,8,31,    # NEU
                                            29,      # pDC
                                            30,    # cDCs
                                            36,    # CC mix
                                            26,    # migDC
                                            24,10,    # Monocyte
                                            23,18,15,   # IMs
                                            25,7,22,28    # AMs
                                            
                                            
                                            ))
DotPlot(GEX.seur, features = rev(as.vector(unlist(markers.preAnno))[1:60]), group.by = "sort_clusters")  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, features = rev(as.vector(unlist(markers.preAnno))[61:120]), group.by = "sort_clusters")  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        ncol = 2,
        pt.size = 0, group.by = "sort_clusters") &
  geom_jitter(alpha=0.15, shape=16, width = 0.3 ,size = 0.12)
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        ncol = 2,
        pt.size = 0, group.by = "sort_clusters")

find markers

# find markers for every cluster compared to all remaining cells, report only the positive ones
Idents(GEX.seur) <- "sort_clusters"

#GEX.markers.pre <- FindAllMarkers(GEX.seur, only.pos = TRUE, min.pct = 0.05,
#                                  test.use = "MAST",
#                                  logfc.threshold = 0.25)
GEX.markers.pre <- read.table("sc10x_RZB.markers.pre20240408.csv", header = TRUE, sep = ",")
#GEX.markers.pre %>% group_by(cluster) %>% top_n(n = 8, wt = avg_log2FC)
GEX.markers.pre$cluster <- factor(as.character(GEX.markers.pre$cluster),
                          levels = levels(GEX.seur$sort_clusters))

markers.pre_t60 <- (GEX.markers.pre %>% group_by(cluster) %>% 
                  filter(pct.1>0.05 & gene %in% grep("Rps|Rpl|mt-",GEX.markers.pre$gene,invert = T,value = T)) %>%
                   top_n(n = 60, wt = avg_log2FC) %>%
                    filter(p_val_adj < 0.01) %>%
                   ungroup() %>%
  arrange(desc(avg_log2FC*pct.1),gene) %>%
                             distinct(gene, .keep_all = TRUE) %>%
                             arrange(cluster,p_val_adj))$gene

markers.pre_t120 <- (GEX.markers.pre %>% group_by(cluster) %>% 
                  filter(pct.1>0.05 & gene %in% grep("Rps|Rpl|mt-",GEX.markers.pre$gene,invert = T,value = T)) %>%
                   top_n(n = 120, wt = avg_log2FC) %>%
                    filter(p_val_adj < 0.01) %>%
                   ungroup() %>%
  arrange(desc(avg_log2FC*pct.1),gene) %>%
                             distinct(gene, .keep_all = TRUE) %>%
                             arrange(cluster,p_val_adj))$gene
ttt = 1159
ttt/60
## [1] 19.31667
ttt/64
## [1] 18.10938
ttt/65
## [1] 17.83077
ttt = 1977
ttt/60
## [1] 32.95
ttt/64
## [1] 30.89062
ttt/65
## [1] 30.41538
pp.t60 <- list()

for(i in 1:18){
pp.t60[[i]] <- DotPlot(GEX.seur, features = rev(markers.pre_t60[(65*i-64):(65*i)]))  + coord_flip() + 
           theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
}

pp.t60
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

## 
## [[15]]

## 
## [[16]]

## 
## [[17]]

## 
## [[18]]

pp.t120 <- list()

for(i in 1:33){
pp.t120[[i]] <- DotPlot(GEX.seur, features = rev(markers.pre_t120[(60*i-59):(60*i)]))  + coord_flip() + 
           theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
}

#pp.t120
#marker.GSE127465 <- list(Mono1=c("Vcan","Ly6c1","Ly6c2"),
#                         Mono2=c("Cd300e","Itgal","Ace","Fcgr4"),
#                         Mono3=c("Il1b","S100a9","S100a8"),
#                         )

marker.GSE12746 <- list(NK=c("Gzma","Klrb1c","Ncr1","Klra4","Eomes","Gzmb","Fasl"),
                        Tcell=c("Cd3d","Cd3e","Cd3g","Cd28"),
                        Bcell=c("Cd79a","Fcmr","Cd79b","Cd19","Fcer2a","Pax5","Cd22"),
                        Basophil=c("Il6","Gata2","Cpa3","Ms4a2","Fcer1a"),
                        Neutrophil=c("S100a9","S100a8","Mmp9","Csf3r","Mmp8","Il1rn","Cxcr2"),
                        Mono1=c("Vcan","Ly6c1","Ly6c2"),
                        Mono2=c("Cd300e","Itgal","Ace","Fcgr4"),
                        Mono3=c("Il1b","S100a9","S100a8","Csf1r","Cd14"),
                        #MoMDC=c("Lyz1","Csf1r","Cd300e","Mafb","Batf3","Krt79","Msr1"),
                        mpDC=c("Siglech","Ccr9","Bst2","Pacsin1","Tcf4","Bcl11a","Runx2"),
                        mDC1=c("Xcr1","Cadm1","Cd36","Itgae","Cd24a"),
                        mDC2=c("Itgax","H2-DMb2","Csf1r"),
                        mDC3=c("Fscn1","Ccr7","Ccl22","Tnfrsf9","Sema7a","Stat4","Il12b")
                        )



DotPlot(GEX.seur, features = rev(unique(as.vector(unlist(marker.GSE12746)))), group.by = "sort_clusters")  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

ref markers

check markers from Gibbings2017 which’s fig.2 defined signatures of AM and IM1-3

markers.Gibbings2017 <- list(Mac.genes=c("Mertk","Fcgr1","Adgre1","Cd68","Lyz2","Lamp1","Cd36"),   # Emr1: Adgre1
                             DC.genes=c("Ccr7","Dpp4","Zbtb46","Batf3","Irf4","Itgam","Itgax"),  # 4 more added, Batf3, Irf4, CD11b, CD11c
                             Mono.genes=c("Mafb","Maf","Cx3cr1","Itgam","Cd14","Cd163","Csf1r","Fcgr3"),
                             AM.genes=c("Marco","Siglecf","Car4","Csf2ra","Csf2rb"),
                             M1.genes=c("H2-Ab1","H2-Aa","Cd86","Tlr2","Tlr4","Socs3","Tnf","Il1b","Il18"),
                             M2.genes=c("Chil3","Chil4","Tgm2","Retnla","Mrc1","Il10"),   # no Clec7a expressed 
                             Inflam=c("Lyve1","C1qb","C1qa","C1qc","C4b","Fcna","Colec12","Ifnar1","Ifnar2","Ifngr1","Ifngr2",
                                      "Il4ra","Il6ra","Il10ra","Il10rb","Il21r","Il12rb2","Il17ra"),
                             Chemo.lr=c("Ccl12","Ccl2","Ccl24","Ccl3","Ccl4","Ccl6","Ccl7","Ccl8","Ccl9","Cxcl13","Cxcl14","Cxcl16","Cxcl2","Ccr1","Ccr2","Ccr5")
                             )
DotPlot(GEX.seur, features = rev(c(markers.Gibbings2017$Mac.genes,markers.Gibbings2017$DC.genes)), scale = FALSE)  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, features = rev(c(markers.Gibbings2017$Mono.genes,markers.Gibbings2017$AM.genes)), scale = TRUE)  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, features = rev(c(markers.Gibbings2017$M1.genes,markers.Gibbings2017$M2.genes)), scale = TRUE)  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, features = rev(c(markers.Gibbings2017$Inflam)), scale = TRUE)  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, features = rev(c(markers.Gibbings2017$Chemo.lr)), scale = TRUE)  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

cell composition

GEX.seur$SP.info <- factor(as.character(GEX.seur$FB.info),
                           levels = levels(GEX.seur$FB.info)[3:10])
stat.SP <- table(GEX.seur@meta.data[,c("SP.info","sort_clusters")])
stat.SP
##            sort_clusters
## SP.info      34  35  20  21  32  27  19   0  38   9  16  33  37  14  13   1  11
##   LUNG.CTL1  28  16  44  43  24  26  94 415  12 268 175  15   9  49 120 295  79
##   LUNG.CTL2  34  34  60  40  45  12  82 383  10 179 108  14  13  67 157 241 123
##   LUNG.CKO1  39  43 140 130  34  46  89 309  18 100  96   3   7  55 157 311 160
##   LUNG.CKO2  34  23 182  67  49  22  91 382  19 101 128   6  12  70 141 306 151
##   BALF.CTL1   2   0   7  44  16  48  39  14   1  53   8  74   4  43  10  31  39
##   BALF.CTL2   0   0   1  35  16  11  40   8   1  36   6  40   5 120  14  54  27
##   BALF.CKO1   1   0   3  33   6 121  32   8   1  11   2  10   4  41  14  24  50
##   BALF.CKO2   1   0   1  26  13  14  27   7   1  28   7  25   9  96  26  56  59
##            sort_clusters
## SP.info      12  17   6   4   3   5   2   8  31  29  30  36  26  24  10  23  18
##   LUNG.CTL1  17  45  12   1  11   7  14   8   3  58  58  29  57  63 127 100 110
##   LUNG.CTL2  11  61  11   9  16  10   8  13   2  64  70  20  29  74 181  97 104
##   LUNG.CKO1  17 139  17  13  19  25  17  27   5  44  46  10  31  95 184  49 137
##   LUNG.CKO2  29  63  19   6   7  10   5  20   3  89  64  15  35 114 222  81 106
##   BALF.CTL1 100  43 189 293 229 256 309 196  45  12   1   1  68   0   8  19  10
##   BALF.CTL2 183  30 231 229 289 202 277 160  42  10   3   1  36   0   4  10  12
##   BALF.CKO1 126  89 208 320 360 350 422 233  82   9   2   0  17   1   0   5  10
##   BALF.CKO2 158  28 295 241 253 192 192 144  40   4   2   2  28   0   4  14   9
##            sort_clusters
## SP.info      15  25   7  22  28
##   LUNG.CTL1  58  43  28 146  43
##   LUNG.CTL2  51  24  16  84   6
##   LUNG.CKO1  55  26  10  86  18
##   LUNG.CKO2  52  18   5  86  17
##   BALF.CTL1  82  95 222   1  94
##   BALF.CTL2  68  32 129   2   9
##   BALF.CKO1  61  32 164   0  50
##   BALF.CKO2 104  45 284   1  60
round(rowRatio(stat.SP),3)
##            sort_clusters
## SP.info        34    35    20    21    32    27    19     0    38     9    16
##   LUNG.CTL1 0.010 0.006 0.016 0.016 0.009 0.009 0.034 0.151 0.004 0.097 0.064
##   LUNG.CTL2 0.013 0.013 0.023 0.016 0.018 0.005 0.032 0.149 0.004 0.070 0.042
##   LUNG.CKO1 0.014 0.015 0.050 0.046 0.012 0.016 0.032 0.110 0.006 0.036 0.034
##   LUNG.CKO2 0.012 0.008 0.064 0.024 0.017 0.008 0.032 0.134 0.007 0.035 0.045
##   BALF.CTL1 0.001 0.000 0.003 0.016 0.006 0.018 0.014 0.005 0.000 0.020 0.003
##   BALF.CTL2 0.000 0.000 0.000 0.015 0.007 0.005 0.017 0.003 0.000 0.015 0.003
##   BALF.CKO1 0.000 0.000 0.001 0.011 0.002 0.042 0.011 0.003 0.000 0.004 0.001
##   BALF.CKO2 0.000 0.000 0.000 0.010 0.005 0.006 0.011 0.003 0.000 0.011 0.003
##            sort_clusters
## SP.info        33    37    14    13     1    11    12    17     6     4     3
##   LUNG.CTL1 0.005 0.003 0.018 0.044 0.107 0.029 0.006 0.016 0.004 0.000 0.004
##   LUNG.CTL2 0.005 0.005 0.026 0.061 0.094 0.048 0.004 0.024 0.004 0.004 0.006
##   LUNG.CKO1 0.001 0.002 0.020 0.056 0.111 0.057 0.006 0.050 0.006 0.005 0.007
##   LUNG.CKO2 0.002 0.004 0.025 0.049 0.107 0.053 0.010 0.022 0.007 0.002 0.002
##   BALF.CTL1 0.027 0.001 0.016 0.004 0.011 0.014 0.037 0.016 0.070 0.108 0.085
##   BALF.CTL2 0.017 0.002 0.051 0.006 0.023 0.011 0.077 0.013 0.097 0.097 0.122
##   BALF.CKO1 0.003 0.001 0.014 0.005 0.008 0.017 0.043 0.031 0.072 0.110 0.124
##   BALF.CKO2 0.010 0.004 0.038 0.010 0.022 0.024 0.063 0.011 0.118 0.097 0.101
##            sort_clusters
## SP.info         5     2     8    31    29    30    36    26    24    10    23
##   LUNG.CTL1 0.003 0.005 0.003 0.001 0.021 0.021 0.011 0.021 0.023 0.046 0.036
##   LUNG.CTL2 0.004 0.003 0.005 0.001 0.025 0.027 0.008 0.011 0.029 0.071 0.038
##   LUNG.CKO1 0.009 0.006 0.010 0.002 0.016 0.016 0.004 0.011 0.034 0.066 0.017
##   LUNG.CKO2 0.004 0.002 0.007 0.001 0.031 0.022 0.005 0.012 0.040 0.078 0.028
##   BALF.CTL1 0.095 0.114 0.072 0.017 0.004 0.000 0.000 0.025 0.000 0.003 0.007
##   BALF.CTL2 0.085 0.117 0.067 0.018 0.004 0.001 0.000 0.015 0.000 0.002 0.004
##   BALF.CKO1 0.121 0.145 0.080 0.028 0.003 0.001 0.000 0.006 0.000 0.000 0.002
##   BALF.CKO2 0.077 0.077 0.058 0.016 0.002 0.001 0.001 0.011 0.000 0.002 0.006
##            sort_clusters
## SP.info        18    15    25     7    22    28
##   LUNG.CTL1 0.040 0.021 0.016 0.010 0.053 0.016
##   LUNG.CTL2 0.041 0.020 0.009 0.006 0.033 0.002
##   LUNG.CKO1 0.049 0.020 0.009 0.004 0.031 0.006
##   LUNG.CKO2 0.037 0.018 0.006 0.002 0.030 0.006
##   BALF.CTL1 0.004 0.030 0.035 0.082 0.000 0.035
##   BALF.CTL2 0.005 0.029 0.013 0.054 0.001 0.004
##   BALF.CKO1 0.003 0.021 0.011 0.057 0.000 0.017
##   BALF.CKO2 0.004 0.042 0.018 0.114 0.000 0.024
melt.SP <- reshape2::melt(stat.SP)
melt.SP$sort_clusters <- factor(as.character(melt.SP$sort_clusters),
                                levels = rev(levels(GEX.seur$sort_clusters)))

color.clusters <- scales::hue_pal()(39)
names(color.clusters) <- as.character(0:38)

p.SP <- ggplot(melt.SP, aes(SP.info, weight=value, fill=sort_clusters)) +
  geom_bar(color="black", width = .7, position = "fill") +
  labs( y = "Proportion of total (%)",title = "",x="") +
  scale_fill_manual(values = color.clusters[rev(levels(GEX.seur$sort_clusters))]) +
  scale_y_continuous(expand = c(0,0), n.breaks = 6, labels = c(0,20,40,60,80,100)) + 
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 13.6),
        axis.text.y = element_text(size = 16))+
  guides(fill = guide_legend(reverse = T, title = ""))
p.SP

preAnno

GEX.seur$preAnno1 <- as.character(GEX.seur$seurat_clusters)

# Bcell
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(34)] <- "Bcell"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(35)] <- "Plasma"

# Tcell
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(20)] <- "CD8T"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(21)] <- "CD4T"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(32)] <- "Treg"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(27)] <- "Tin"    #  innate-like T

# NK
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(19)] <- "NK1"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(0)] <- "NK2"

# ILC2
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(9)] <- "ILC2.CC"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(16)] <- "ILC2.1"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(33)] <- "ILC2.2"

# Basophil
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(37)] <- "BAS"

# Eosinophil
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(14)] <- "EOS"

# Neutrophil
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(13,1)] <- "NEU1"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(11,12)] <- "NEU2"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(6,4,3)] <- "NEU3"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(5,17)] <- "NEU4"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(2,8,31)] <- "NEU5"

# Siglech+
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(29)] <- "pDC"

# 
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(30)] <- "cDC"

# Ccr7+ Fscn1+
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(26)] <- "migDC"

# Monocyte, half MHCII+ half Ly6c+
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(24)] <- "Mono1"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(10)] <- "Mono2"

# Interstitial Macrophages
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(23)] <- "IM1"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(18)] <- "IM2"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(15)] <- "IM3"

# Alveolar Macrophages
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(25)] <- "AM.CC"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(7)] <- "AM1"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(22)] <- "AM2"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(28)] <- "AM3"


# cyclijng mix
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(38)] <- "CC.mix1"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(36)] <- "CC.mix2"

GEX.seur$preAnno1 <- factor(GEX.seur$preAnno1,
                            levels = c("Bcell","Plasma",
                                       "CD8T","CD4T","Treg","Tin",
                                       "NK1","NK2",
                                       "ILC2.CC","ILC2.1","ILC2.2",
                                       "BAS","EOS",
                                       paste0("NEU",1:5),
                                       "pDC","cDC","migDC",
                                       "Mono1","Mono2","IM1","IM2","IM3",
                                       "AM.CC","AM1","AM2","AM3",
                                       "CC.mix1","CC.mix2"))
GEX.seur$preAnno2 <- as.character(GEX.seur$seurat_clusters)

# Bcell
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(34,35)] <- "Bcell"

# Tcell
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(20,21,32,27)] <- "Tcell"

# NK
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(19,0)] <- "NK"

# ILC2
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(9,16,33)] <- "ILC2"

# Basophil
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(37)] <- "BAS"

# Eosinophil
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(14)] <- "EOS"

# Neutrophil
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(13,1,11,12,6,4,3,5,17,2,8,31)] <- "NEU"

# Siglech+
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(29)] <- "pDC"

# 
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(30)] <- "cDC"

# Ccr7+ Fscn1+
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(26)] <- "migDC"

# Monocyte, half MHCII+ half Ly6c+
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(24,10)] <- "Monocyte"

# Interstitial Macrophages
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(23,18,15)] <- "IM"

# Alveolar Macrophages
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(25,7,22,28)] <- "AM"

GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(38,36)] <- "MIX"


GEX.seur$preAnno2 <- factor(GEX.seur$preAnno2,
                            levels = c("Bcell","Tcell",
                                       "NK",
                                       "ILC2",
                                       "BAS","EOS",
                                       "NEU",
                                       "pDC","cDC","migDC",
                                       "Monocyte","IM",
                                       "AM","MIX"))
ggsci::pal_igv("default")(40)
##  [1] "#5050FFFF" "#CE3D32FF" "#749B58FF" "#F0E685FF" "#466983FF" "#BA6338FF"
##  [7] "#5DB1DDFF" "#802268FF" "#6BD76BFF" "#D595A7FF" "#924822FF" "#837B8DFF"
## [13] "#C75127FF" "#D58F5CFF" "#7A65A5FF" "#E4AF69FF" "#3B1B53FF" "#CDDEB7FF"
## [19] "#612A79FF" "#AE1F63FF" "#E7C76FFF" "#5A655EFF" "#CC9900FF" "#99CC00FF"
## [25] "#A9A9A9FF" "#CC9900FF" "#99CC00FF" "#33CC00FF" "#00CC33FF" "#00CC99FF"
## [31] "#0099CCFF" "#0A47FFFF" "#4775FFFF" "#FFC20AFF" "#FFD147FF" "#990033FF"
## [37] "#991A00FF" "#996600FF" "#809900FF" "#339900FF"
scales::show_col(ggsci::pal_igv("default")(40))

color.pre1 <- c(scales::hue_pal()(32),"grey","darkgrey")
color.pre2 <- c(ggsci::pal_igv("default")(40)[c(2,3,6,7,8,10,12,22,18:19,38,21,30)],"darkgrey")
scales::show_col(color.pre1)

scales::show_col(color.pre2)

(DimPlot(GEX.seur, reduction = "umap", group.by = "preAnno1", label = T, label.size = 3.5,repel = T,
          cols =color.pre1) + NoLegend()) +
  (DimPlot(GEX.seur, reduction = "umap", group.by = "preAnno2", label = T, label.size = 3.5,repel = T,
          cols =color.pre2) + NoLegend())

DotPlot(GEX.seur, features = rev(as.vector(unlist(markers.preAnno))[1:60]), group.by = "preAnno1")  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, features = rev(as.vector(unlist(markers.preAnno))[61:120]), group.by = "preAnno1")  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DimPlot(GEX.seur, reduction = "umap", label = F,group.by = "preAnno1", split.by = "cnt", ncol = 2,
        cols = color.pre1) 

DimPlot(GEX.seur, reduction = "umap", label = F,group.by = "preAnno2", split.by = "cnt", ncol = 2,
        cols = color.pre2) 

#saveRDS(GEX.seur,"sc10x_RZB.preAnno.rds")

check

GEX.seur
## An object of class Seurat 
## 18826 features across 21447 samples within 1 assay 
## Active assay: RNA (18826 features, 1265 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
GEX.pure <- subset(GEX.seur, subset = preAnno2 != "MIX")
GEX.pure
## An object of class Seurat 
## 18826 features across 21306 samples within 1 assay 
## Active assay: RNA (18826 features, 1265 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
GEX.pure$preAnno1 <- factor(as.character(GEX.pure$preAnno1),
                            levels = c("Bcell","Plasma",
                                       "CD8T","CD4T","Treg","Tin",
                                       "NK1","NK2",
                                       "ILC2.CC","ILC2.1","ILC2.2",
                                       "BAS","EOS",
                                       paste0("NEU",1:5),
                                       "pDC","cDC","migDC",
                                       "Mono1","Mono2","IM1","IM2","IM3",
                                       "AM.CC","AM1","AM2","AM3"))

GEX.pure$preAnno2 <- factor(as.character(GEX.pure$preAnno2),
                            levels = c("Bcell","Tcell",
                                       "NK",
                                       "ILC2",
                                       "BAS","EOS",
                                       "NEU",
                                       "pDC","cDC","migDC",
                                       "Monocyte","IM",
                                       "AM"))
DimPlot(GEX.pure, reduction = "umap", label = F,group.by = "preAnno2", split.by = "cnt", ncol = 2,
        cols = color.pre2) 

cell composition

preAnno1

stat.SP <- table(GEX.pure@meta.data[,c("SP.info","preAnno1")])
#rownames(stat.SP)[4] <- c("M22+")
stat.SP
##            preAnno1
## SP.info     Bcell Plasma CD8T CD4T Treg Tin NK1 NK2 ILC2.CC ILC2.1 ILC2.2 BAS
##   LUNG.CTL1    28     16   44   43   24  26  94 415     268    175     15   9
##   LUNG.CTL2    34     34   60   40   45  12  82 383     179    108     14  13
##   LUNG.CKO1    39     43  140  130   34  46  89 309     100     96      3   7
##   LUNG.CKO2    34     23  182   67   49  22  91 382     101    128      6  12
##   BALF.CTL1     2      0    7   44   16  48  39  14      53      8     74   4
##   BALF.CTL2     0      0    1   35   16  11  40   8      36      6     40   5
##   BALF.CKO1     1      0    3   33    6 121  32   8      11      2     10   4
##   BALF.CKO2     1      0    1   26   13  14  27   7      28      7     25   9
##            preAnno1
## SP.info     EOS NEU1 NEU2 NEU3 NEU4 NEU5 pDC cDC migDC Mono1 Mono2 IM1 IM2 IM3
##   LUNG.CTL1  49  415   96   24   52   25  58  58    57    63   127 100 110  58
##   LUNG.CTL2  67  398  134   36   71   23  64  70    29    74   181  97 104  51
##   LUNG.CKO1  55  468  177   49  164   49  44  46    31    95   184  49 137  55
##   LUNG.CKO2  70  447  180   32   73   28  89  64    35   114   222  81 106  52
##   BALF.CTL1  43   41  139  711  299  550  12   1    68     0     8  19  10  82
##   BALF.CTL2 120   68  210  749  232  479  10   3    36     0     4  10  12  68
##   BALF.CKO1  41   38  176  888  439  737   9   2    17     1     0   5  10  61
##   BALF.CKO2  96   82  217  789  220  376   4   2    28     0     4  14   9 104
##            preAnno1
## SP.info     AM.CC AM1 AM2 AM3
##   LUNG.CTL1    43  28 146  43
##   LUNG.CTL2    24  16  84   6
##   LUNG.CKO1    26  10  86  18
##   LUNG.CKO2    18   5  86  17
##   BALF.CTL1    95 222   1  94
##   BALF.CTL2    32 129   2   9
##   BALF.CKO1    32 164   0  50
##   BALF.CKO2    45 284   1  60
round(rowRatio(stat.SP),3)
##            preAnno1
## SP.info     Bcell Plasma  CD8T  CD4T  Treg   Tin   NK1   NK2 ILC2.CC ILC2.1
##   LUNG.CTL1 0.010  0.006 0.016 0.016 0.009 0.010 0.035 0.153   0.099  0.065
##   LUNG.CTL2 0.013  0.013 0.024 0.016 0.018 0.005 0.032 0.151   0.071  0.043
##   LUNG.CKO1 0.014  0.015 0.050 0.047 0.012 0.017 0.032 0.111   0.036  0.035
##   LUNG.CKO2 0.012  0.008 0.065 0.024 0.017 0.008 0.032 0.136   0.036  0.045
##   BALF.CTL1 0.001  0.000 0.003 0.016 0.006 0.018 0.014 0.005   0.020  0.003
##   BALF.CTL2 0.000  0.000 0.000 0.015 0.007 0.005 0.017 0.003   0.015  0.003
##   BALF.CKO1 0.000  0.000 0.001 0.011 0.002 0.042 0.011 0.003   0.004  0.001
##   BALF.CKO2 0.000  0.000 0.000 0.010 0.005 0.006 0.011 0.003   0.011  0.003
##            preAnno1
## SP.info     ILC2.2   BAS   EOS  NEU1  NEU2  NEU3  NEU4  NEU5   pDC   cDC migDC
##   LUNG.CTL1  0.006 0.003 0.018 0.153 0.035 0.009 0.019 0.009 0.021 0.021 0.021
##   LUNG.CTL2  0.006 0.005 0.026 0.157 0.053 0.014 0.028 0.009 0.025 0.028 0.011
##   LUNG.CKO1  0.001 0.003 0.020 0.168 0.064 0.018 0.059 0.018 0.016 0.017 0.011
##   LUNG.CKO2  0.002 0.004 0.025 0.159 0.064 0.011 0.026 0.010 0.032 0.023 0.012
##   BALF.CTL1  0.027 0.001 0.016 0.015 0.051 0.263 0.111 0.203 0.004 0.000 0.025
##   BALF.CTL2  0.017 0.002 0.051 0.029 0.089 0.316 0.098 0.202 0.004 0.001 0.015
##   BALF.CKO1  0.003 0.001 0.014 0.013 0.061 0.306 0.151 0.254 0.003 0.001 0.006
##   BALF.CKO2  0.010 0.004 0.039 0.033 0.087 0.316 0.088 0.151 0.002 0.001 0.011
##            preAnno1
## SP.info     Mono1 Mono2   IM1   IM2   IM3 AM.CC   AM1   AM2   AM3
##   LUNG.CTL1 0.023 0.047 0.037 0.041 0.021 0.016 0.010 0.054 0.016
##   LUNG.CTL2 0.029 0.071 0.038 0.041 0.020 0.009 0.006 0.033 0.002
##   LUNG.CKO1 0.034 0.066 0.018 0.049 0.020 0.009 0.004 0.031 0.006
##   LUNG.CKO2 0.040 0.079 0.029 0.038 0.018 0.006 0.002 0.031 0.006
##   BALF.CTL1 0.000 0.003 0.007 0.004 0.030 0.035 0.082 0.000 0.035
##   BALF.CTL2 0.000 0.002 0.004 0.005 0.029 0.013 0.054 0.001 0.004
##   BALF.CKO1 0.000 0.000 0.002 0.003 0.021 0.011 0.057 0.000 0.017
##   BALF.CKO2 0.000 0.002 0.006 0.004 0.042 0.018 0.114 0.000 0.024
melt.SP <- reshape2::melt(stat.SP)
melt.SP$preAnno1 <- factor(as.character(melt.SP$preAnno1),
                                levels = rev(levels(GEX.pure$preAnno1)))

color.stat1 <- color.pre1
names(color.stat1) <- levels(GEX.pure$preAnno1)

p.SP <- ggplot(melt.SP, aes(SP.info, weight=value, fill=preAnno1)) +
  geom_bar(color="black", width = .7, position = "fill") +
  labs( y = "Proportion of total (%)",title = "",x="") +
  scale_fill_manual(values = color.stat1[rev(levels(GEX.pure$preAnno1))]) +
  scale_y_continuous(expand = c(0,0), n.breaks = 6, labels = c(0,20,40,60,80,100)) + 
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 13.6),
        axis.text.y = element_text(size = 16))+
  guides(fill = guide_legend(reverse = T, title = ""))
p.SP

preAnno2

stat.SP <- table(GEX.pure@meta.data[,c("SP.info","preAnno2")])
#rownames(stat.SP)[4] <- c("M22+")
stat.SP
##            preAnno2
## SP.info     Bcell Tcell   NK ILC2  BAS  EOS  NEU  pDC  cDC migDC Monocyte   IM
##   LUNG.CTL1    44   137  509  458    9   49  612   58   58    57      190  268
##   LUNG.CTL2    68   157  465  301   13   67  662   64   70    29      255  252
##   LUNG.CKO1    82   350  398  199    7   55  907   44   46    31      279  241
##   LUNG.CKO2    57   320  473  235   12   70  760   89   64    35      336  239
##   BALF.CTL1     2   115   53  135    4   43 1740   12    1    68        8  111
##   BALF.CTL2     0    63   48   82    5  120 1738   10    3    36        4   90
##   BALF.CKO1     1   163   40   23    4   41 2278    9    2    17        1   76
##   BALF.CKO2     1    54   34   60    9   96 1684    4    2    28        4  127
##            preAnno2
## SP.info       AM
##   LUNG.CTL1  260
##   LUNG.CTL2  130
##   LUNG.CKO1  140
##   LUNG.CKO2  126
##   BALF.CTL1  412
##   BALF.CTL2  172
##   BALF.CKO1  246
##   BALF.CKO2  390
round(rowRatio(stat.SP),3)
##            preAnno2
## SP.info     Bcell Tcell    NK  ILC2   BAS   EOS   NEU   pDC   cDC migDC
##   LUNG.CTL1 0.016 0.051 0.188 0.169 0.003 0.018 0.226 0.021 0.021 0.021
##   LUNG.CTL2 0.027 0.062 0.184 0.119 0.005 0.026 0.261 0.025 0.028 0.011
##   LUNG.CKO1 0.030 0.126 0.143 0.072 0.003 0.020 0.326 0.016 0.017 0.011
##   LUNG.CKO2 0.020 0.114 0.168 0.083 0.004 0.025 0.270 0.032 0.023 0.012
##   BALF.CTL1 0.001 0.043 0.020 0.050 0.001 0.016 0.643 0.004 0.000 0.025
##   BALF.CTL2 0.000 0.027 0.020 0.035 0.002 0.051 0.733 0.004 0.001 0.015
##   BALF.CKO1 0.000 0.056 0.014 0.008 0.001 0.014 0.785 0.003 0.001 0.006
##   BALF.CKO2 0.000 0.022 0.014 0.024 0.004 0.039 0.675 0.002 0.001 0.011
##            preAnno2
## SP.info     Monocyte    IM    AM
##   LUNG.CTL1    0.070 0.099 0.096
##   LUNG.CTL2    0.101 0.099 0.051
##   LUNG.CKO1    0.100 0.087 0.050
##   LUNG.CKO2    0.119 0.085 0.045
##   BALF.CTL1    0.003 0.041 0.152
##   BALF.CTL2    0.002 0.038 0.073
##   BALF.CKO1    0.000 0.026 0.085
##   BALF.CKO2    0.002 0.051 0.156
melt.SP <- reshape2::melt(stat.SP)
melt.SP$preAnno2 <- factor(as.character(melt.SP$preAnno2),
                                levels = rev(levels(GEX.pure$preAnno2)))

color.stat2 <- color.pre2
names(color.stat2) <- levels(GEX.pure$preAnno2)

p.SP <- ggplot(melt.SP, aes(SP.info, weight=value, fill=preAnno2)) +
  geom_bar(color="black", width = .7, position = "fill") +
  labs( y = "Proportion of total (%)",title = "",x="") +
  scale_fill_manual(values = color.stat2[rev(levels(GEX.pure$preAnno2))]) +
  scale_y_continuous(expand = c(0,0), n.breaks = 6, labels = c(0,20,40,60,80,100)) + 
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 13.6),
        axis.text.y = element_text(size = 16))+
  guides(fill = guide_legend(reverse = T, title = ""))
p.SP

stat.cnt <- table(GEX.pure@meta.data[,c("cnt","preAnno2")])
stat.cnt
##           preAnno2
## cnt        Bcell Tcell   NK ILC2  BAS  EOS  NEU  pDC  cDC migDC Monocyte   IM
##   LUNG.CTL   112   294  974  759   22  116 1274  122  128    86      445  520
##   LUNG.CKO   139   670  871  434   19  125 1667  133  110    66      615  480
##   BALF.CTL     2   178  101  217    9  163 3478   22    4   104       12  201
##   BALF.CKO     2   217   74   83   13  137 3962   13    4    45        5  203
##           preAnno2
## cnt          AM
##   LUNG.CTL  390
##   LUNG.CKO  266
##   BALF.CTL  584
##   BALF.CKO  636
round(rowRatio(stat.cnt),3)
##           preAnno2
## cnt        Bcell Tcell    NK  ILC2   BAS   EOS   NEU   pDC   cDC migDC Monocyte
##   LUNG.CTL 0.021 0.056 0.186 0.145 0.004 0.022 0.243 0.023 0.024 0.016    0.085
##   LUNG.CKO 0.025 0.120 0.156 0.078 0.003 0.022 0.298 0.024 0.020 0.012    0.110
##   BALF.CTL 0.000 0.035 0.020 0.043 0.002 0.032 0.685 0.004 0.001 0.020    0.002
##   BALF.CKO 0.000 0.040 0.014 0.015 0.002 0.025 0.735 0.002 0.001 0.008    0.001
##           preAnno2
## cnt           IM    AM
##   LUNG.CTL 0.099 0.074
##   LUNG.CKO 0.086 0.048
##   BALF.CTL 0.040 0.115
##   BALF.CKO 0.038 0.118
melt.cnt <- reshape2::melt(stat.cnt)
melt.cnt$preAnno2 <- factor(as.character(melt.cnt$preAnno2),
                                levels = rev(levels(GEX.pure$preAnno2)))

color.stat2 <- color.pre2
names(color.stat2) <- levels(GEX.pure$preAnno2)

p.cnt <- ggplot(melt.cnt, aes(cnt, weight=value, fill=preAnno2)) +
  geom_bar(color="black", width = .7, position = "fill") +
  labs( y = "Proportion of total (%)",title = "",x="") +
  scale_fill_manual(values = color.stat2[rev(levels(GEX.pure$preAnno2))]) +
  scale_y_continuous(expand = c(0,0), n.breaks = 6, labels = c(0,20,40,60,80,100)) + 
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 13.6),
        axis.text.y = element_text(size = 16))+
  guides(fill = guide_legend(reverse = T, title = ""))
p.cnt

new

GEX.pure$rep <- paste0("rep",
                        gsub("LUNG.CTL|LUNG.CKO|BALF.CTL|BALF.CKO","",as.character(GEX.pure$FB.info)))
pumap1 <- DimPlot(GEX.pure, reduction = "umap", label = T, group.by = "preAnno2", cols = color.pre2) +
  DimPlot(GEX.pure, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)
pumap1

pumap2 <- DimPlot(GEX.pure, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "FB.info", split.by = "FB.info", 
        ncol = 4, cols = color.FB)
pumap2

pumap3 <- DimPlot(GEX.pure, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "preAnno2", split.by = "FB.info", 
        ncol = 4, cols = color.pre2)
pumap3

heatmap

cowplot::plot_grid(
pheatmap::pheatmap(table(FB.info=GEX.pure$SP.info,
      clusters=GEX.pure$preAnno2),
                   main = "Cell Count",
                   gaps_row = c(2,4,6),
      #gaps_col = c(4,7,10),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", legend = F, silent = T)$gtable,

pheatmap::pheatmap(100*rowRatio(table(FB.info=GEX.pure$SP.info,
                                clusters=GEX.pure$preAnno2)),
                   main = "Cell Ratio",
                   gaps_row = c(2,4,6),
      #gaps_col = c(4,7,10),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f", legend = F, silent =T)$gtable,
ncol = 1)

barplot.1

stat_preAnno2 <- GEX.pure@meta.data[,c("cnt","rep","preAnno2")]

stat_preAnno2.s <- stat_preAnno2 %>%
  group_by(cnt,rep,preAnno2) %>%
  summarise(count=n()) %>%
  mutate(prop= count/sum(count)) %>%
  ungroup
## `summarise()` has grouped output by 'cnt', 'rep'. You can override using the
## `.groups` argument.
pcom.preAnno2 <- stat_preAnno2.s %>%
  ggplot(aes(x = preAnno2, y = 100*prop, color=cnt)) +
  geom_bar(stat="summary", fun="mean", position = position_dodge(0.75), width = 0.58, fill="white") +
  theme_classic(base_size = 15) +
  scale_color_manual(values = c(color.cnt), name="") +
  labs(title="Cell Composition of Lung immune data, preAnno2", y = "Proportion") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6)) +
  
  geom_point(aes(x=preAnno2, y = 100*prop, color= cnt),
             position = position_dodge(0.75),
             shape=16,alpha=0.75,size=2.15,
             stroke=0.15, show.legend=F)

pcom.preAnno2

sig.test

glm.nb - anova.LRT

Sort.N <- c("LUNG.CTL","LUNG.CKO")

glm.nb_anova.preAnno2 <- list()

for(x1 in 1:2){
  for(x2 in 1:2){
    if(x2>x1){
      
      stat_preAnno2.s_N <- stat_preAnno2.s %>% filter(cnt %in% c(Sort.N[x1],Sort.N[x2]))
      
      
      total.N <- stat_preAnno2.s_N %>%
        group_by(cnt, rep) %>%
        summarise(total=sum(count)) %>% ungroup() %>% as.data.frame()
      
      rownames(total.N) <- paste0(as.character(total.N$cnt),
                                  "_",
                                  as.character(total.N$rep))
      
      stat_preAnno2.s_N$total <- total.N[paste0(stat_preAnno2.s_N$cnt,
                                            "_",
                                            stat_preAnno2.s_N$rep),"total"]
      
      glm.nb_anova.preAnno2[[paste0(Sort.N[x1],"vs",Sort.N[x2])]] <- list()
      
      for(AA in levels(GEX.pure$preAnno2)){
        glm.nb_anova.preAnno2[[paste0(Sort.N[x1],"vs",Sort.N[x2])]][[AA]] <- tryCatch({
          aaa <- stat_preAnno2.s_N %>% filter(preAnno2==AA)
          aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=1000)
          aaa.anova <- anova(aaa.glmnb, test = "LRT")
          aaa.anova$'Pr(>Chi)'[2]
        }, error=function(e){
        
          tryCatch({
            aaa <- stat_preAnno2.s_N %>% filter(preAnno2==AA)
            aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=100)
            aaa.anova <- anova(aaa.glmnb, test = "LRT")
            aaa.anova$'Pr(>Chi)'[2]
          }, error=function(e){
            
            tryCatch({
              aaa <- stat_preAnno2.s_N %>% filter(preAnno2==AA)
              aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=10)
              aaa.anova <- anova(aaa.glmnb, test = "LRT")
              aaa.anova$'Pr(>Chi)'[2]
            }, error = function(e){
              NA
            })     
          })
        })
      }
      glm.nb_anova.preAnno2[[paste0(Sort.N[x1],"vs",Sort.N[x2])]] <- unlist(glm.nb_anova.preAnno2[[paste0(Sort.N[x1],"vs",Sort.N[x2])]]) 
    }
  }
}
glm.nb_anova.preAnno2_df1 <- t(data.frame(Reduce(function(x,y){rbind(x,y)},glm.nb_anova.preAnno2)))
rownames(glm.nb_anova.preAnno2_df1) <- names(glm.nb_anova.preAnno2)
colnames(glm.nb_anova.preAnno2_df1) <- gsub("X","C",colnames(glm.nb_anova.preAnno2_df1))
glm.nb_anova.preAnno2_df1
##                        Bcell        Tcell          NK         ILC2       BAS
## LUNG.CTLvsLUNG.CKO 0.5027616 2.101569e-27 0.001200813 1.043013e-05 0.4982111
##                          EOS        NEU       pDC       cDC     migDC
## LUNG.CTLvsLUNG.CKO 0.9613576 0.01760329 0.9439911 0.1242673 0.1516296
##                      Monocyte         IM         AM
## LUNG.CTLvsLUNG.CKO 0.06729544 0.02171049 0.05362246
round(glm.nb_anova.preAnno2_df1,4)
##                     Bcell Tcell     NK ILC2    BAS    EOS    NEU   pDC    cDC
## LUNG.CTLvsLUNG.CKO 0.5028     0 0.0012    0 0.4982 0.9614 0.0176 0.944 0.1243
##                     migDC Monocyte     IM     AM
## LUNG.CTLvsLUNG.CKO 0.1516   0.0673 0.0217 0.0536
Sort.N <- c("BALF.CTL","BALF.CKO")

glm.nb_anova.preAnno2 <- list()

for(x1 in 1:2){
  for(x2 in 1:2){
    if(x2>x1){
      
      stat_preAnno2.s_N <- stat_preAnno2.s %>% filter(cnt %in% c(Sort.N[x1],Sort.N[x2]))
      
      
      total.N <- stat_preAnno2.s_N %>%
        group_by(cnt, rep) %>%
        summarise(total=sum(count)) %>% ungroup() %>% as.data.frame()
      
      rownames(total.N) <- paste0(as.character(total.N$cnt),
                                  "_",
                                  as.character(total.N$rep))
      
      stat_preAnno2.s_N$total <- total.N[paste0(stat_preAnno2.s_N$cnt,
                                            "_",
                                            stat_preAnno2.s_N$rep),"total"]
      
      glm.nb_anova.preAnno2[[paste0(Sort.N[x1],"vs",Sort.N[x2])]] <- list()
      
      for(AA in levels(GEX.pure$preAnno2)){
        glm.nb_anova.preAnno2[[paste0(Sort.N[x1],"vs",Sort.N[x2])]][[AA]] <- tryCatch({
          aaa <- stat_preAnno2.s_N %>% filter(preAnno2==AA)
          aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=1000)
          aaa.anova <- anova(aaa.glmnb, test = "LRT")
          aaa.anova$'Pr(>Chi)'[2]
        }, error=function(e){
        
          tryCatch({
            aaa <- stat_preAnno2.s_N %>% filter(preAnno2==AA)
            aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=100)
            aaa.anova <- anova(aaa.glmnb, test = "LRT")
            aaa.anova$'Pr(>Chi)'[2]
          }, error=function(e){
            
            tryCatch({
              aaa <- stat_preAnno2.s_N %>% filter(preAnno2==AA)
              aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=10)
              aaa.anova <- anova(aaa.glmnb, test = "LRT")
              aaa.anova$'Pr(>Chi)'[2]
            }, error = function(e){
              NA
            })     
          })
        })
      }
      glm.nb_anova.preAnno2[[paste0(Sort.N[x1],"vs",Sort.N[x2])]] <- unlist(glm.nb_anova.preAnno2[[paste0(Sort.N[x1],"vs",Sort.N[x2])]]) 
    }
  }
}
glm.nb_anova.preAnno2_df2 <- t(data.frame(Reduce(function(x,y){rbind(x,y)},glm.nb_anova.preAnno2)))
rownames(glm.nb_anova.preAnno2_df2) <- names(glm.nb_anova.preAnno2)
colnames(glm.nb_anova.preAnno2_df2) <- gsub("X","C",colnames(glm.nb_anova.preAnno2_df2))
glm.nb_anova.preAnno2_df2
##                        Bcell     Tcell         NK       ILC2     BAS       EOS
## BALF.CTLvsBALF.CKO 0.4940555 0.7424084 0.01437989 0.01182911 0.48015 0.6517545
##                          NEU        pDC       cDC       migDC   Monocyte
## BALF.CTLvsBALF.CKO 0.3903129 0.08739859 0.9313036 0.001902429 0.06470806
##                           IM        AM
## BALF.CTLvsBALF.CKO 0.9023446 0.8368056
round(glm.nb_anova.preAnno2_df2,4)
##                     Bcell  Tcell     NK   ILC2    BAS    EOS    NEU    pDC
## BALF.CTLvsBALF.CKO 0.4941 0.7424 0.0144 0.0118 0.4802 0.6518 0.3903 0.0874
##                       cDC  migDC Monocyte     IM     AM
## BALF.CTLvsBALF.CKO 0.9313 0.0019   0.0647 0.9023 0.8368
rbind(glm.nb_anova.preAnno2_df1,
      glm.nb_anova.preAnno2_df2)
##                        Bcell        Tcell          NK         ILC2       BAS
## LUNG.CTLvsLUNG.CKO 0.5027616 2.101569e-27 0.001200813 1.043013e-05 0.4982111
## BALF.CTLvsBALF.CKO 0.4940555 7.424084e-01 0.014379887 1.182911e-02 0.4801500
##                          EOS        NEU        pDC       cDC       migDC
## LUNG.CTLvsLUNG.CKO 0.9613576 0.01760329 0.94399112 0.1242673 0.151629648
## BALF.CTLvsBALF.CKO 0.6517545 0.39031287 0.08739859 0.9313036 0.001902429
##                      Monocyte         IM         AM
## LUNG.CTLvsLUNG.CKO 0.06729544 0.02171049 0.05362246
## BALF.CTLvsBALF.CKO 0.06470806 0.90234460 0.83680561

DEGs

LUNG.CTL/CKO

calculate in individual script

BALF.CTL/CKO

check

#grep("Rxr|Rar",rownames(GEX.pure),value = T)
#VlnPlot(GEX.pure, features = grep("Rxr|Rar",rownames(GEX.seur),value = T),
#        ncol = 2, group.by = "preAnno2", split.by = "cnt", cols = color.cnt[1:4])